1 // $Id: gc_datastore_export.cpp,v 1.78 2012/05/11 23:27:53 ewalkup Exp $
2
3 /*
4 Copyright 2002 Mary Kuhner, Jon Yamato, and Joseph Felsenstein
5
6 This software is distributed free of charge for non-commercial use
7 and is copyrighted. Of course, we do not guarantee that the software
8 works, and are not responsible for any damage you may cause or have.
9 */
10
11 #include <cassert>
12 //#include <iostream>
13 //using namespace std;
14
15 #include "gc_data.h"
16 #include "gc_data_missing_err.h"
17 #include "gc_datastore.h"
18 #include "gc_default.h"
19 #include "gc_errhandling.h"
20 #include "gc_exportable.h"
21 #include "gc_genotype_resolution.h"
22 #include "gc_individual.h"
23 #include "gc_individual_err.h"
24 #include "gc_parse_block.h"
25 #include "gc_parse_sample.h"
26 #include "gc_phase.h"
27 #include "gc_population.h"
28 #include "gc_locus.h"
29 #include "gc_locus_err.h"
30 #include "gc_region.h"
31 #include "gc_sequential_data.h"
32 #include "gc_strings.h"
33 #include "gc_strings_locus.h"
34 #include "gc_structure_maps.h"
35 #include "gc_structures.h"
36 #include "gc_trait.h"
37 #include "gc_types.h"
38 #include "tinyxml.h"
39 #include "xml_strings.h"
40 #include "wx/log.h"
41 #include "wx/string.h"
42
43 //------------------------------------------------------------------------------------
44
45 gcUnphasedMarkers *
CheckPhaseMarkers(const gcUnphasedMarkers * phaseInfo,wxString indName,const gcLocus & locusRef,bool anyZeroes) const46 GCDataStore::CheckPhaseMarkers( const gcUnphasedMarkers * phaseInfo,
47 wxString indName,
48 const gcLocus & locusRef,
49 bool anyZeroes) const
50 {
51 assert(locusRef.HasLength());
52
53 if(phaseInfo->NumMarkers() > 0)
54 {
55 long smallest = anyZeroes ? 0 : 1;
56 if(locusRef.HasOffset()) smallest = locusRef.GetOffset();
57
58 long largest = smallest + locusRef.GetLength() - 1;
59 if(smallest < 0 && largest >= 0 && (!anyZeroes)) largest++;
60
61 if(phaseInfo->Smallest() < smallest)
62 {
63 throw gc_phase_too_small( phaseInfo->Smallest(),
64 indName,
65 smallest,
66 locusRef.GetName());
67 }
68
69 if(phaseInfo->Largest() > largest)
70 {
71 throw gc_phase_too_large( phaseInfo->Largest(),
72 indName,
73 largest,
74 locusRef.GetName());
75 }
76 }
77
78 phaseInfo->CheckAgainstLocations(locusRef);
79
80 gcUnphasedMarkers * newMarkers = new gcUnphasedMarkers(*phaseInfo);
81 if(!anyZeroes)
82 {
83 newMarkers->ShiftNegsUp();
84 }
85 return newMarkers;
86 }
87
88 void
AddDefaultPhaseElem(TiXmlElement * individualElement) const89 GCDataStore::AddDefaultPhaseElem(TiXmlElement * individualElement) const
90 {
91 TiXmlElement * phaseElement = new TiXmlElement( xmlstr::XML_TAG_PHASE.c_str());
92 individualElement->LinkEndChild(phaseElement);
93 phaseElement->SetAttribute( xmlstr::XML_ATTRTYPE_TYPE.c_str(),
94 xmlstr::XML_ATTRVALUE_UNKNOWN.c_str());
95 TiXmlText * phaseText = new TiXmlText("");
96 phaseElement->LinkEndChild(phaseText);
97 }
98
99 void
AddPhaseElem(TiXmlElement * individualElem,const GCIndividual & indi,const gcLocus & locusRef) const100 GCDataStore::AddPhaseElem(TiXmlElement * individualElem, const GCIndividual & indi, const gcLocus & locusRef) const
101 // EWFIX.P3.BUG.537 -- FIXED ?? need to do this at the locus level as well ??
102 // EWFIX.P4 -- add a locus name attribute in a later release ??
103 {
104 const gcUnphasedMarkers * phaseInfo = indi.GetUnphased(locusRef);
105 if(phaseInfo == NULL || phaseInfo->NumMarkers() == 0)
106 {
107 if (locusRef.HasUnphasedMarkers())
108 {
109 phaseInfo = locusRef.GetUnphasedMarkers();
110 }
111 }
112
113 if(phaseInfo == NULL || phaseInfo->NumMarkers() == 0)
114 {
115 AddDefaultPhaseElem(individualElem);
116 }
117 else
118 {
119 // can throw
120 bool anyZeroes = GetStructures().AnyZeroes(); // EWFIX.P4 -- wasteful
121 gcUnphasedMarkers * newPhaseInfo = CheckPhaseMarkers(phaseInfo,
122 indi.GetName(),
123 locusRef,
124 anyZeroes);
125
126 TiXmlElement * phaseElement = new TiXmlElement( xmlstr::XML_TAG_PHASE.c_str());
127 individualElem->LinkEndChild(phaseElement);
128
129 phaseElement->SetAttribute( xmlstr::XML_ATTRTYPE_TYPE.c_str(),
130 xmlstr::XML_ATTRVALUE_UNKNOWN.c_str());
131
132 TiXmlText * phaseText = new TiXmlText(newPhaseInfo->AsString().c_str());
133 delete newPhaseInfo;
134 phaseElement->LinkEndChild(phaseText);
135 }
136 }
137
138 void
AddDataBlockElem(TiXmlElement * sampleElement,const GCSequentialData & seqData,const gcLocus & locusRef,size_t siteIndex) const139 GCDataStore::AddDataBlockElem(TiXmlElement * sampleElement, const GCSequentialData & seqData, const gcLocus & locusRef, size_t siteIndex) const
140 {
141 gcSpecificDataType dataType = locusRef.GetDataType();
142 if(dataType == sdatatype_NONE_SET)
143 {
144 wxString msg = wxString::Format(gcerr::missingDataTypeForLocus,locusRef.GetName().c_str());
145 gui_error g(msg.c_str());
146 throw g;
147 }
148
149 wxString dataTypeString = ToWxString(dataType);
150 wxString dataString = " "+seqData.GetData(siteIndex)+" ";
151
152 TiXmlElement * dataBlockElement = new TiXmlElement( xmlstr::XML_TAG_DATABLOCK.c_str());
153 sampleElement->LinkEndChild(dataBlockElement);
154 dataBlockElement->SetAttribute( xmlstr::XML_ATTRTYPE_TYPE.c_str(),
155 dataTypeString.c_str());
156
157 TiXmlText * data = new TiXmlText(dataString);
158 dataBlockElement->LinkEndChild(data);
159
160 }
161
162 void
AddDataBlockElem(TiXmlElement * sampleElem,const GCSequentialData & seqData,const gcLocus & locusRef) const163 GCDataStore::AddDataBlockElem(TiXmlElement * sampleElem, const GCSequentialData & seqData, const gcLocus & locusRef ) const
164 {
165 gcSpecificDataType dataType = locusRef.GetDataType();
166 if(dataType == sdatatype_NONE_SET)
167 {
168 wxString msg = wxString::Format(gcerr::missingDataTypeForLocus,locusRef.GetName().c_str());
169 gui_error g(msg.c_str());
170 throw g;
171 }
172
173 TiXmlElement * dataBlockElement = new TiXmlElement( xmlstr::XML_TAG_DATABLOCK.c_str());
174 sampleElem->LinkEndChild(dataBlockElement);
175
176 wxString dataTypeString = ToWxString(dataType);
177 dataBlockElement->SetAttribute( xmlstr::XML_ATTRTYPE_TYPE.c_str(),
178 dataTypeString.c_str());
179
180 wxString dataString = wxString::Format(" %s ",seqData.GetData().c_str());
181 TiXmlText * data = new TiXmlText(dataString);
182 dataBlockElement->LinkEndChild(data);
183
184 }
185
186 void
AddSampleElem(TiXmlElement * individualElem,const GCIndividual & individual,const gcRegion & regionRef,const gcLocus & locusRef,size_t siteIndex,size_t hapIndex) const187 GCDataStore::AddSampleElem(TiXmlElement * individualElem, const GCIndividual & individual, const gcRegion & regionRef, const gcLocus & locusRef, size_t siteIndex, size_t hapIndex) const
188 {
189
190 const gcSample * sample = individual.GetSample(hapIndex);
191 assert(!locusRef.GetLinked());
192 const GCSequentialData & data = sample->GetData(&locusRef);
193
194 TiXmlElement * sampleElement = new TiXmlElement( xmlstr::XML_TAG_SAMPLE.c_str());
195 individualElem->LinkEndChild(sampleElement);
196 sampleElement->SetAttribute(xmlstr::XML_ATTRTYPE_NAME.c_str(),
197 sample->GetLabel().c_str());
198
199 AddDataBlockElem(sampleElement,data,locusRef,siteIndex);
200 }
201
202 void
AddSampleElem(TiXmlElement * individualElem,const GCIndividual & individual,const gcRegion & regionRef,size_t hapIndex) const203 GCDataStore::AddSampleElem(TiXmlElement * individualElem, const GCIndividual & individual, const gcRegion & regionRef, size_t hapIndex) const
204 {
205 const gcSample * sample = individual.GetSample(hapIndex);
206
207 TiXmlElement * sampleElement = new TiXmlElement( xmlstr::XML_TAG_SAMPLE.c_str());
208 individualElem->LinkEndChild(sampleElement);
209 sampleElement->SetAttribute(xmlstr::XML_ATTRTYPE_NAME.c_str(),
210 sample->GetLabel().c_str());
211
212 constObjVector loci = m_structures.GetConstDisplayableLinkedLociInMapOrderFor(regionRef.GetId());
213 for(constObjVector::const_iterator i = loci.begin(); i != loci.end(); i++)
214 {
215 const gcLocus * locP = dynamic_cast<const gcLocus*>(*i);
216 assert(locP != NULL);
217
218 const gcLocus & locusRef = *locP;
219 if(locusRef.GetLinked())
220 {
221 try
222 {
223 const GCSequentialData & data = sample->GetData(locP);
224 AddDataBlockElem(sampleElement,data,locusRef);
225 }
226 catch (const gc_data_error& e)
227 // for gc_sample_missing_locus_data
228 {
229 // EWFIX.P3 -- bad formatting because string e.what()
230 // contains quote characters
231 wxString msg = wxString::Format(gcerr::inIndividual,
232 individual.GetName().c_str(),
233 e.what());
234 throw gc_data_error (msg.c_str());
235 }
236 }
237 }
238 }
239
240 void
AddGenoResolutionElem(TiXmlElement * individualElem,const gcPhenotype & pheno) const241 GCDataStore::AddGenoResolutionElem(TiXmlElement * individualElem, const gcPhenotype & pheno) const
242 {
243 size_t traitId = pheno.GetTraitId();
244 const gcTraitInfo & trait = GetStructures().GetTrait(traitId);
245 wxString traitName = trait.GetName();
246
247 TiXmlElement * genoResoElement = new TiXmlElement( xmlstr::XML_TAG_GENOTYPE_RESOLUTIONS.c_str());
248 individualElem->LinkEndChild(genoResoElement);
249
250 TiXmlElement * traitNameElement = new TiXmlElement( xmlstr::XML_TAG_TRAIT_NAME.c_str());
251 genoResoElement->LinkEndChild(traitNameElement);
252
253 TiXmlText * traitNameTextElement = new TiXmlText(traitName.c_str());
254 traitNameElement->LinkEndChild(traitNameTextElement);
255
256 const std::vector<gcHapProbability> & haps = pheno.GetHapProbabilities();
257 std::vector<gcHapProbability>::const_iterator i;
258 for(i = haps.begin(); i != haps.end(); i++)
259 {
260 const gcHapProbability & hapProb = *i;
261 double relProb = hapProb.GetPenetrance();
262 const gcIdVec & alleleIds = hapProb.GetAlleleIds();
263
264 TiXmlElement * hapElement = new TiXmlElement( xmlstr::XML_TAG_HAPLOTYPES.c_str());
265 genoResoElement->LinkEndChild(hapElement);
266
267 TiXmlElement * relProbElem = new TiXmlElement( xmlstr::XML_TAG_PENETRANCE.c_str());
268 hapElement->LinkEndChild(relProbElem);
269
270 TiXmlText * probText = new TiXmlText(wxString::Format("%f",relProb).c_str());
271 relProbElem->LinkEndChild(probText);
272
273 TiXmlElement * allelesElement = new TiXmlElement( xmlstr::XML_TAG_ALLELES.c_str());
274 hapElement->LinkEndChild(allelesElement);
275
276 wxString alleles = wxEmptyString;
277 for(size_t j=0; j < alleleIds.size(); j++)
278
279 {
280 if(j == 0)
281 {
282 alleles += " ";
283 }
284 const gcTraitAllele & traitAllele = GetStructures().GetAllele(alleleIds[j]);
285 alleles += traitAllele.GetName();
286 alleles += " ";
287 }
288
289 TiXmlText * alleleText = new TiXmlText(alleles.c_str());
290 allelesElement->LinkEndChild(alleleText);
291 }
292 }
293
294 void
AddIndividualElem(TiXmlElement * popElem,const GCIndividual & individual,const gcRegion & regionRef,const gcLocus & locusRef,size_t siteIndex) const295 GCDataStore::AddIndividualElem(TiXmlElement * popElem, const GCIndividual & individual, const gcRegion & regionRef, const gcLocus & locusRef, size_t siteIndex) const
296 {
297 wxString indName = individual.GetName();
298
299 TiXmlElement * individualElement = new TiXmlElement( xmlstr::XML_TAG_INDIVIDUAL.c_str());
300 popElem->LinkEndChild(individualElement);
301 individualElement->SetAttribute( xmlstr::XML_ATTRTYPE_NAME.c_str(),
302 indName.c_str());
303
304 // NOTE : we don't put phase info in because this is a single unlinked site
305 for(size_t hapIndex=0; hapIndex < individual.GetNumSamples(); hapIndex++)
306 {
307 AddSampleElem(individualElement,individual,regionRef,locusRef,siteIndex,hapIndex);
308 }
309 }
310
311 void
AddIndividualElem(TiXmlElement * popElem,const GCIndividual & individual,const gcRegion & regionRef) const312 GCDataStore::AddIndividualElem(TiXmlElement * popElem, const GCIndividual & individual, const gcRegion & regionRef) const
313 {
314 wxString indName = individual.GetName();
315
316 TiXmlElement * individualElement = new TiXmlElement( xmlstr::XML_TAG_INDIVIDUAL.c_str());
317 popElem->LinkEndChild(individualElement);
318 individualElement->SetAttribute( xmlstr::XML_ATTRTYPE_NAME.c_str(),
319 indName.c_str());
320
321 const gcIdSet & phenoIds = individual.GetPhenotypeIds();
322 for(gcIdSet::const_iterator i=phenoIds.begin(); i!=phenoIds.end(); i++)
323 {
324 const gcPhenotype & pheno = GetStructures().GetPhenotype(*i);
325 AddGenoResolutionElem(individualElement,pheno);
326 }
327
328 if(individual.GetNumSamples() > 1)
329 // to put in phase elem only
330 {
331 gcIdVec locIds = GetStructures().GetLocusIdsForRegionByMapPosition(regionRef.GetId());
332
333 for(gcIdVec::const_iterator i = locIds.begin(); i != locIds.end(); i++)
334 {
335 size_t locusId = (*i);
336 const gcLocus & locusRef = m_structures.GetLocus(locusId);
337 if(locusRef.GetLinked())
338 {
339 AddPhaseElem(individualElement,individual,locusRef);
340 }
341 }
342
343 }
344
345 for(size_t hapIndex=0; hapIndex < individual.GetNumSamples(); hapIndex++)
346 {
347 AddSampleElem(individualElement,individual,regionRef,hapIndex);
348 }
349 }
350
351 void
AddPanelElem(TiXmlElement * popElem,const gcPanel & panel) const352 GCDataStore::AddPanelElem(TiXmlElement * popElem, const gcPanel & panel) const
353 {
354 TiXmlElement * panelElement = new TiXmlElement( xmlstr::XML_TAG_PANEL.c_str());
355 popElem->LinkEndChild(panelElement);
356 panelElement->SetAttribute(xmlstr::XML_ATTRTYPE_NAME.c_str(), (panel.GetName()).c_str());
357
358 TiXmlElement * sizeElement = new TiXmlElement( xmlstr::XML_TAG_PANELSIZE.c_str());
359 panelElement->LinkEndChild(sizeElement);
360 wxString sizeString = wxString::Format(" %ld ",panel.GetNumPanels());
361 TiXmlText * sizeText = new TiXmlText(sizeString);
362 sizeElement->LinkEndChild(sizeText);
363 }
364
365 TiXmlElement *
MakeBlockElem(const gcLocus & locusRef,bool requireMapPosition) const366 GCDataStore::MakeBlockElem(const gcLocus & locusRef, bool requireMapPosition) const
367 {
368 TiXmlElement * blockElement = new TiXmlElement( xmlstr::XML_TAG_BLOCK.c_str());
369 blockElement->SetAttribute( xmlstr::XML_ATTRTYPE_NAME.c_str(),
370 locusRef.GetName().c_str());
371
372 if(locusRef.HasOffset())
373 {
374 wxString offsetString = wxString::Format(" %ld ",locusRef.GetOffset());
375
376 TiXmlElement * offsetElement = new TiXmlElement( xmlstr::XML_TAG_OFFSET.c_str());
377 TiXmlText * offsetText = new TiXmlText(offsetString);
378 offsetElement->LinkEndChild(offsetText);
379 blockElement->LinkEndChild(offsetElement);
380 }
381
382 long mapPosition = 1;
383 if(requireMapPosition && !locusRef.HasMapPosition())
384 {
385 wxString msg = wxString::Format(gcerr::locusWithoutMapPosition,
386 locusRef.GetName().c_str());
387 gui_error g(msg);
388 throw g;
389 }
390 if(locusRef.HasMapPosition())
391 {
392 mapPosition = locusRef.GetMapPosition();
393 }
394 wxString mapPosString = wxString::Format(" %ld ",mapPosition);
395 TiXmlText * mapPosText = new TiXmlText(mapPosString);
396 TiXmlElement * mapPositionElement = new TiXmlElement( xmlstr::XML_TAG_MAP_POSITION.c_str());
397 mapPositionElement->LinkEndChild(mapPosText);
398 blockElement->LinkEndChild(mapPositionElement);
399
400 TiXmlElement * lengthElement = new TiXmlElement( xmlstr::XML_TAG_LENGTH.c_str());
401 if(locusRef.HasTotalLength())
402 {
403 if( ! (locusRef.GetDataType() == sdatatype_DNA && locusRef.GetLength() == locusRef.GetNumMarkers()))
404 {
405 wxString lengthString = wxString::Format(" %ld ",(long)locusRef.GetLength());
406 TiXmlText * lengthText = new TiXmlText(lengthString);
407 lengthElement->LinkEndChild(lengthText);
408 blockElement->LinkEndChild(lengthElement);
409 }
410 }
411 else
412 {
413 if(locusRef.GetDataType() != sdatatype_DNA && locusRef.GetNumMarkers() > 1)
414 {
415 delete lengthElement;
416 wxString msg = wxString::Format(gcerr::missingLengthForLocus,locusRef.GetName().c_str());
417 gui_error g(msg.c_str());
418 throw g;
419 }
420 }
421
422 if(locusRef.HasLocations())
423 {
424 TiXmlElement * locationsElement = new TiXmlElement( xmlstr::XML_TAG_LOCATIONS.c_str());
425 wxString locationsString = locusRef.GetLocationsAsString();
426 TiXmlText * locationsText = new TiXmlText(locationsString);
427 locationsElement->LinkEndChild(locationsText);
428 blockElement->LinkEndChild(locationsElement);
429 }
430
431 return blockElement;
432 }
433
434 TiXmlElement *
MakeBlockElemWithMapPosition(const gcLocus & locusRef) const435 GCDataStore::MakeBlockElemWithMapPosition(const gcLocus & locusRef) const
436 {
437 return MakeBlockElem(locusRef,true);
438 }
439
440 TiXmlElement *
MakeBlockElemWithoutMapPosition(const gcLocus & locusRef) const441 GCDataStore::MakeBlockElemWithoutMapPosition(const gcLocus & locusRef) const
442 {
443 return MakeBlockElem(locusRef,false);
444 }
445
446 void
AddEffectivePopSizeElem(TiXmlElement * regionElem,const gcRegion & regionRef) const447 GCDataStore::AddEffectivePopSizeElem(TiXmlElement * regionElem, const gcRegion & regionRef) const
448 {
449 if(regionRef.HasEffectivePopulationSize())
450 {
451 double psize = regionRef.GetEffectivePopulationSize();
452 TiXmlElement * effPopSizeElem = new TiXmlElement( xmlstr::XML_TAG_EFFECTIVE_POPSIZE.c_str());
453 regionElem->LinkEndChild(effPopSizeElem);
454 TiXmlText * popSizeText = new TiXmlText(wxString::Format("%f",psize).c_str());
455 effPopSizeElem->LinkEndChild(popSizeText);
456 }
457 }
458
459 #if 0 // Potentially DEAD CODE (bobgian, Feb 2010)
460 void
461 GCDataStore::AddSpacingElemSingle(TiXmlElement * regionElem, const gcLocus & locusRef) const
462 {
463 TiXmlElement * spacingElement = new TiXmlElement( xmlstr::XML_TAG_SPACING.c_str());
464 regionElem->LinkEndChild(spacingElement);
465 TiXmlElement * blockElement = MakeBlockElemWithoutMapPosition(locusRef);
466 spacingElement->LinkEndChild(blockElement);
467 }
468 #endif
469
470 #if 0 // EWFIX.REMOVE
471 TiXmlElement *
472 GCDataStore::MakeSpacingElemMulti(const constObjVector & loci, const gcRegion & regionRef) const
473 {
474 // check that loci don't overlap
475 GetStructures().VerifyLocusSeparations(regionRef);
476
477 TiXmlElement * spacingElement = new TiXmlElement( xmlstr::XML_TAG_SPACING.c_str());
478
479 long nextAllowable = gcdefault::badMapPosition;
480 wxString lastLocusName;
481
482 for(constObjVector::const_iterator iter = loci.begin(); iter != loci.end(); iter++)
483 {
484 const gcLocus * locP = dynamic_cast<const gcLocus*>(*iter);
485 if(locP == NULL)
486 {
487 wxString msg = wxString::Format(gcerr::corruptedDisplayableLociInMapOrder);
488 gc_implementation_error g(msg.c_str());
489 throw g;
490 }
491
492 if(!locP->HasMapPosition())
493 {
494 wxString msg = wxString::Format(gcerr::locusWithoutMapPosition,
495 locP->GetName().c_str());
496 gui_error g(msg);
497 throw g;
498 }
499 if(!locP->HasTotalLength() && ! (locP->GetDataType() == sdatatype_DNA))
500 {
501 wxString msg = wxString::Format(gcerr::locusWithoutLength,
502 locP->GetName().c_str());
503 gui_error g(msg);
504 throw g;
505 }
506 long mapPosition = locP->GetMapPosition();
507 size_t length = locP->GetLength();
508
509 if(iter != loci.begin())
510 {
511 if(mapPosition < nextAllowable)
512 {
513 wxString msg = wxString::Format(gcerr::locusOverlap,
514 lastLocusName.c_str(),
515 nextAllowable-1,
516 locP->GetName().c_str(),
517 mapPosition);
518 gui_error g(msg);
519 throw g;
520 }
521 }
522
523 nextAllowable = mapPosition + length;
524 lastLocusName = locP->GetName();
525
526 TiXmlElement * blockElement = MakeBlockElemWithMapPosition(*locP);
527 spacingElement->LinkEndChild(blockElement);
528 }
529
530 return spacingElement;
531 }
532 #endif
533
534 void
AddSpacingElem(TiXmlElement * regionElem,const gcRegion & regionRef) const535 GCDataStore::AddSpacingElem(TiXmlElement * regionElem, const gcRegion & regionRef) const
536 {
537 constObjVector loci = GetStructures().GetConstDisplayableLinkedLociInMapOrderFor(regionRef.GetId());
538
539 bool needMapInfo = (loci.size() > 1);
540
541 bool anyZeroes = GetStructures().AnyZeroes(); // EWFIX.P4 -- wasteful
542
543 TiXmlElement * spacingElement = new TiXmlElement( xmlstr::XML_TAG_SPACING.c_str());
544 regionElem->LinkEndChild(spacingElement);
545
546 bool havePrevious = false;
547 long lastExtent = 0;
548 size_t lastLocusId = 0;
549
550 for(constObjVector::const_iterator i=loci.begin(); i != loci.end(); i++)
551 {
552 const gcLocus * locusP = dynamic_cast<const gcLocus *>(*i);
553 assert(locusP != NULL);
554
555 assert(locusP->GetLinked());
556
557 TiXmlElement * blockElement = new TiXmlElement( xmlstr::XML_TAG_BLOCK.c_str());
558 blockElement->SetAttribute( xmlstr::XML_ATTRTYPE_NAME.c_str(),
559 locusP->GetName().c_str());
560
561 long start = gcdefault::badMapPosition;
562 //////////////////////////////////////////////////////
563 // map position
564 if(! locusP->HasMapPosition())
565 {
566 if(needMapInfo)
567 {
568 // EWFIX.P3 -- should use specific error
569 throw gc_locus_err(wxString::Format(gcerr::locusWithoutMapPosition,
570 locusP->GetName().c_str()));
571 }
572 }
573 else
574 {
575 start = locusP->GetMapPosition();
576 if(!anyZeroes && (start < 0))
577 {
578 start++;
579 }
580 TiXmlElement * mapPositionElement = new TiXmlElement( xmlstr::XML_TAG_MAP_POSITION.c_str());
581 wxString mapPosString = wxString::Format(" %ld ",start);
582 TiXmlText * mapPosText = new TiXmlText(mapPosString);
583 mapPositionElement->LinkEndChild(mapPosText);
584 blockElement->LinkEndChild(mapPositionElement);
585 }
586
587 //////////////////////////////////////////////////////
588 // length
589 if(locusP->HasTotalLength())
590 {
591 TiXmlElement * lengthElement = new TiXmlElement( xmlstr::XML_TAG_LENGTH.c_str());
592 wxString lengthString = wxString::Format(" %ld ",(long)locusP->GetTotalLength());
593 TiXmlText * lengthText = new TiXmlText(lengthString);
594 lengthElement->LinkEndChild(lengthText);
595 blockElement->LinkEndChild(lengthElement);
596 }
597
598 if(havePrevious)
599 {
600 if(start <= lastExtent)
601 {
602 long end = locusP->GetMapPosition()+locusP->GetLength()-1;
603 long lastStart = GetStructures().GetLocus(lastLocusId).GetMapPosition();
604 throw gc_locus_overlap(locusP->GetName(),start,end,
605 GetStructures().GetLocus(lastLocusId).GetName(),lastStart,lastExtent);
606 }
607 }
608
609 if(needMapInfo)
610 {
611 havePrevious = true;
612 size_t length = locusP->GetLength();
613 long stop = start + length;
614 stop -= 1; // EWFIX explain why
615 lastExtent = stop;
616 lastLocusId = locusP->GetId();
617 }
618
619 //////////////////////////////////////////////////////
620 // offset
621 long offset = 1;
622 if(locusP->HasOffset())
623 {
624 offset = locusP->GetOffset();
625
626 if(!anyZeroes && (offset < 1))
627 {
628 offset++;
629 }
630 start += offset;
631 TiXmlElement * offsetElement = new TiXmlElement( xmlstr::XML_TAG_OFFSET.c_str());
632 wxString offsetString = wxString::Format(" %ld ",offset);
633 TiXmlText * offsetText = new TiXmlText(offsetString);
634 offsetElement->LinkEndChild(offsetText);
635 blockElement->LinkEndChild(offsetElement);
636 }
637 else
638 {
639 if(locusP->GetDataType() == sdatatype_SNP && locusP->HasLocations())
640 {
641 // EWFIX.P3 -- should use specific error
642 throw gc_locus_err(wxString::Format(gcerr_locus::offsetMissingSnpLocations,locusP->GetName().c_str()));
643 }
644
645 if(needMapInfo)
646 {
647 bool lengthIsMarkers = false;
648 if(locusP->HasNumMarkers())
649 {
650 if(locusP->HasLength())
651 {
652 lengthIsMarkers = (locusP->GetNumMarkers() == locusP->GetLength());
653 }
654 else
655 {
656 lengthIsMarkers = (locusP->GetNumMarkers() == 1);
657 }
658 }
659 if(!lengthIsMarkers)
660 {
661 throw gc_locus_err(wxString::Format(gcerr_locus::offsetMissingMultiSegment,locusP->GetName().c_str()));
662 }
663 }
664
665 }
666
667 //////////////////////////////////////////////////////
668 // locations
669 if(locusP->HasLocations())
670 {
671 std::vector<long> locations = locusP->GetLocations();
672 wxString locationString = " ";
673
674 if(locusP->HasNumMarkers())
675 {
676 size_t numLocs = locations.size();
677 size_t numMarkers = locusP->GetNumMarkers();
678 if(numLocs != numMarkers)
679 {
680 throw gc_wrong_location_count( (long)numLocs,
681 (long)numMarkers,
682 locusP->GetName());
683 }
684 }
685
686 for(size_t i = 0; i < locations.size(); i++)
687 {
688 long loc = locations[i];
689
690 // EWFIX.P3 -- mouse cut and paste from similar
691 // calculations in phase item
692 long smallest = anyZeroes ? 0 : 1;
693 if(locusP->HasOffset()) smallest = locusP->GetOffset();
694
695 long largest = smallest + locusP->GetLength() - 1;
696 if(smallest < 0 && largest >= 0 && (!anyZeroes)) largest++;
697
698 if(loc < smallest)
699 {
700 throw gc_location_too_small(loc,
701 smallest,
702 locusP->GetName());
703 }
704
705 if(loc > largest)
706 {
707 throw gc_location_too_large(loc,
708 largest,
709 locusP->GetName());
710 }
711
712 if(!anyZeroes && loc < 1)
713 {
714 loc++;
715 }
716 locationString += wxString::Format("%ld ",loc);
717 }
718
719 TiXmlElement * locationElement = new TiXmlElement( xmlstr::XML_TAG_LOCATIONS.c_str());
720 TiXmlText * locationText = new TiXmlText(locationString);
721 locationElement->LinkEndChild(locationText);
722 blockElement->LinkEndChild(locationElement);
723 }
724 else
725 // when num markers > 1
726 // dna -- required if length > markers
727 // snp -- required for recom
728 // msat/kallele -- required for recom
729 {
730 if(locusP->GetNumMarkers() > 1)
731 {
732 if(locusP->GetDataType() == sdatatype_DNA)
733 {
734 if(locusP->HasTotalLength())
735 {
736 if(locusP->GetTotalLength() > locusP->GetNumMarkers())
737 {
738 throw gc_locus_err(wxString::Format(gcerr_locus::dnaBigLengthNeedsLocations,locusP->GetName().c_str()));
739 }
740 }
741 }
742 else
743 {
744 bool continueOn = guiQuestionBatchLog(wxString::Format(gcstr::locationsForRecom,locusP->GetName().c_str()),gcstr::abandonExport,gcstr::continueExport);
745 if(!continueOn)
746 {
747 throw gc_abandon_export();
748 }
749 }
750 }
751 }
752
753 spacingElement->LinkEndChild(blockElement);
754 }
755 }
756
757 void
AddTraitsElem(TiXmlElement * regionElem,const GCTraitInfoSet & traits) const758 GCDataStore::AddTraitsElem(TiXmlElement * regionElem, const GCTraitInfoSet & traits) const
759 {
760 if(!traits.empty())
761 {
762 TiXmlElement * traitsElement = new TiXmlElement( xmlstr::XML_TAG_TRAITS.c_str());
763 regionElem->LinkEndChild(traitsElement);
764
765 for(GCTraitInfoSet::const_iterator i=traits.begin(); i!=traits.end(); i++)
766 {
767 const gcTraitInfo & traitInfo = m_structures.GetTrait(*i);
768 TiXmlText * name = new TiXmlText(traitInfo.GetName());
769
770 TiXmlElement * nameElement = new TiXmlElement( xmlstr::XML_TAG_NAME.c_str());
771 nameElement->LinkEndChild(name);
772
773 TiXmlElement * traitElement = new TiXmlElement( xmlstr::XML_TAG_TRAIT.c_str());
774 traitElement->LinkEndChild(nameElement);
775
776 traitsElement->LinkEndChild(traitElement);
777 }
778 }
779 }
780
781 TiXmlElement *
MakePopulationElemForUnlinkedLocus(const gcNameResolvedInfo & info,const gcPopulation & popRef,const gcRegion & regionRef,const gcLocus & locusRef,size_t siteIndex) const782 GCDataStore::MakePopulationElemForUnlinkedLocus(const gcNameResolvedInfo & info, const gcPopulation & popRef, const gcRegion & regionRef, const gcLocus & locusRef, size_t siteIndex) const
783 {
784 TiXmlElement * popElement = new TiXmlElement( xmlstr::XML_TAG_POPULATION.c_str());
785 popElement->SetAttribute( xmlstr::XML_ATTRTYPE_NAME.c_str(),
786 popRef.GetName().c_str());
787
788 std::vector<const GCIndividual*> individuals = info.GetIndividuals();
789 for(size_t individualIndex=0; individualIndex < individuals.size(); individualIndex++)
790 {
791 assert(individuals[individualIndex] != NULL);
792 const GCIndividual & individual = *(individuals[individualIndex]);
793 AddIndividualElem(popElement,individual,regionRef,locusRef,siteIndex);
794 }
795
796 return popElement;
797 }
798
799 void
AddPopulationElemForRegion(TiXmlElement * regionElem,const gcNameResolvedInfo & info,const gcRegion & regionRef,const gcPopulation & popRef) const800 GCDataStore::AddPopulationElemForRegion(TiXmlElement * regionElem, const gcNameResolvedInfo & info, const gcRegion & regionRef, const gcPopulation & popRef) const
801 {
802 TiXmlElement * popElement = new TiXmlElement( xmlstr::XML_TAG_POPULATION.c_str());
803 regionElem->LinkEndChild(popElement);
804 popElement->SetAttribute( xmlstr::XML_ATTRTYPE_NAME.c_str(),
805 popRef.GetName().c_str());
806
807 std::vector<const GCIndividual*> individuals = info.GetIndividuals();
808 for(size_t index=0; index < individuals.size(); index++)
809 {
810 assert(individuals[index] != NULL);
811 const GCIndividual & individual = *(individuals[index]);
812 AddIndividualElem(popElement,individual,info.GetRegionRef());
813 }
814
815 if (m_structures.GetPanelsState())
816 {
817 if (m_structures.HasPanel(regionRef.GetId(), popRef.GetId()))
818 {
819 const gcPanel &panelRef = m_structures.GetPanel(regionRef.GetId(), popRef.GetId());
820 if (panelRef.GetBlessed() && (panelRef.GetNumPanels() > 0))
821 {
822 // only add > 0 panel counts
823 AddPanelElem(popElement, panelRef);
824 }
825 }
826 }
827 }
828
829 void
AddRegionElem(TiXmlElement * data,const gcExportable & expo,const gcRegion & regionRef) const830 GCDataStore::AddRegionElem(TiXmlElement * data, const gcExportable & expo, const gcRegion & regionRef) const
831 {
832 // create the named region element
833 TiXmlElement * regionElement = new TiXmlElement( xmlstr::XML_TAG_REGION.c_str());
834 data->LinkEndChild(regionElement);
835 regionElement->SetAttribute( xmlstr::XML_ATTRTYPE_NAME.c_str(),
836 regionRef.GetName().c_str());
837
838 AddEffectivePopSizeElem(regionElement,regionRef);
839 AddSpacingElem(regionElement,regionRef);
840 AddTraitsElem(regionElement,regionRef.GetTraitInfoSet());
841
842 #if 0 // Potentially DEAD CODE (bobgian, Feb 2010)
843 TiXmlElement * effPopSizeElem = MakeEffectivePopSizeElem(regionRef);
844 if(effPopSizeElem != NULL)
845 {
846 regionElement->LinkEndChild(effPopSizeElem);
847 }
848
849 TiXmlElement * spacingElement = MakeSpacingElem(regionRef);
850 regionElement->LinkEndChild(spacingElement);
851
852 const GCTraitInfoSet & traits = regionRef.GetTraitInfoSet();
853 if(!traits.empty())
854 {
855 TiXmlElement * traitsElement = MakeTraitsElem(traits);
856 regionElement->LinkEndChild(traitsElement);
857 }
858 #endif
859
860 // write out info for each population
861 constObjVector pops = m_structures.GetConstDisplayablePops();
862 for(constObjVector::const_iterator iter = pops.begin(); iter != pops.end(); iter++)
863 {
864 const gcPopulation * popP = dynamic_cast<const gcPopulation*>(*iter);
865 assert(popP != NULL);
866 const gcNameResolvedInfo & info = expo.GetInfo(*popP,regionRef);
867 AddPopulationElemForRegion(regionElement,info,regionRef,*popP);
868 }
869 }
870
871 void
AddDivergenceElem(TiXmlElement * forces) const872 GCDataStore::AddDivergenceElem(TiXmlElement * forces) const
873 {
874 wxLogVerbose("In AddDivergenceElem"); // JMDBG
875 // create divergence element
876 TiXmlElement * divergence = new TiXmlElement( xmlstr::XML_TAG_DIVERGENCE.c_str());
877 forces->LinkEndChild(divergence);
878
879 // boilerplate
880 TiXmlElement * prior = new TiXmlElement( xmlstr::XML_TAG_PRIOR.c_str());
881 prior->SetAttribute(xmlstr::XML_ATTRTYPE_TYPE.c_str(), "linear");
882 divergence->LinkEndChild(prior);
883
884 TiXmlElement * paramindex = new TiXmlElement( xmlstr::XML_TAG_PARAMINDEX.c_str());
885 wxString dataString = " default ";
886 TiXmlText * data = new TiXmlText(dataString);
887 paramindex->LinkEndChild(data);
888 prior->LinkEndChild(paramindex);
889
890 TiXmlElement * lower = new TiXmlElement( xmlstr::XML_TAG_PRIORLOWERBOUND.c_str());
891 dataString = " 0.0 ";
892 data = new TiXmlText(dataString);
893 lower->LinkEndChild(data);
894 prior->LinkEndChild(lower);
895
896 TiXmlElement * upper = new TiXmlElement( xmlstr::XML_TAG_PRIORUPPERBOUND.c_str());
897 dataString = " 0.01 ";
898 data = new TiXmlText(dataString);
899 upper->LinkEndChild(data);
900 prior->LinkEndChild(upper);
901 wxLogVerbose(" boilerplate out"); // JMDBG
902
903 // variable values
904 gcDisplayOrder parids = m_structures.GetParentIds();
905
906 TiXmlElement * method = new TiXmlElement( xmlstr::XML_TAG_METHOD.c_str());
907 dataString = " ";
908 for(gcDisplayOrder::const_iterator iter = parids.begin(); iter != parids.end(); iter++)
909 {
910 dataString += " USER ";
911 }
912 data = new TiXmlText(dataString);
913 method->LinkEndChild(data);
914 divergence->LinkEndChild(method);
915
916 TiXmlElement * startval = new TiXmlElement( xmlstr::XML_TAG_START_VALUES.c_str());
917 float sval = 0;
918 dataString = " ";
919 for(gcDisplayOrder::const_iterator iter = parids.begin(); iter != parids.end(); iter++)
920 {
921 sval += 0.002;
922 dataString += wxString::Format(_T(" %f "),sval);
923 }
924 data = new TiXmlText(dataString);
925 startval->LinkEndChild(data);
926 divergence->LinkEndChild(startval);
927 wxLogVerbose(" start values out"); // JMDBG
928
929 TiXmlElement * poptree = new TiXmlElement( xmlstr::XML_TAG_POPTREE.c_str());
930 divergence->LinkEndChild(poptree);
931
932 for(gcDisplayOrder::const_iterator iter = parids.begin(); iter != parids.end(); iter++)
933 {
934 TiXmlElement * epochb = new TiXmlElement( xmlstr::XML_TAG_EPOCH_BOUNDARY.c_str());
935
936 TiXmlElement * newpop = new TiXmlElement( xmlstr::XML_TAG_NEWPOP.c_str());
937 const gcParent & parent = m_structures.GetParent(*iter);
938 wxLogVerbose(" parentid: %i child1id: %i child2id: %i",(int)*iter, (int)parent.GetChild1Id(), (int)parent.GetChild2Id()); // JMDBG
939
940 dataString = " ";
941 if (m_structures.IsPop(parent.GetChild1Id()))
942 {
943 dataString += m_structures.GetPop(parent.GetChild1Id()).GetName().c_str();
944 }
945 else
946 {
947 dataString += m_structures.GetParent(parent.GetChild1Id()).GetName().c_str();
948 }
949
950 dataString += " ";
951 if (m_structures.IsPop(parent.GetChild2Id()))
952 {
953 dataString += m_structures.GetPop(parent.GetChild2Id()).GetName().c_str();
954 }
955 else
956 {
957 dataString += m_structures.GetParent(parent.GetChild2Id()).GetName().c_str();
958 }
959 dataString += " ";
960
961 data = new TiXmlText(dataString);
962 newpop->LinkEndChild(data);
963 epochb->LinkEndChild(newpop);
964
965 TiXmlElement * ancestor = new TiXmlElement( xmlstr::XML_TAG_ANCESTOR.c_str());
966 dataString = " ";
967 dataString += parent.GetName().c_str();
968 dataString += " ";
969 data = new TiXmlText(dataString);
970 ancestor->LinkEndChild(data);
971 epochb->LinkEndChild(ancestor);
972
973 poptree->LinkEndChild(epochb);
974 }
975 wxLogVerbose(" done"); // JMDBG
976 }
977
978 void
AddMigrationElem(TiXmlElement * forces)979 GCDataStore::AddMigrationElem(TiXmlElement * forces)
980 {
981 wxLogVerbose("In AddMigrationElem"); // JMDBG
982 //wxLogMessage("In AddMigrationElem"); // JMDBG
983 //cout << "In AddMigrationElem" << endl;
984
985
986 // create migration element
987 TiXmlElement * mig;
988 if (m_structures.GetDivMigMatrixDefined())
989 {
990 wxLogVerbose("Divergence Migration Matrix being output"); // JMDBG
991 //cout << "Divergence Migration Matrix being output" << endl;
992 mig = new TiXmlElement( xmlstr::XML_TAG_DIVMIG.c_str());
993 }
994 else if (m_structures.GetMigMatrixDefined())
995 {
996 wxLogVerbose("Migration Matrix being output"); // JMDBG
997 //cout << "Migration Matrix being output" << endl;
998 mig = new TiXmlElement( xmlstr::XML_TAG_MIGRATION.c_str());
999 }
1000 else
1001 {
1002 // neither a migration or divergence migration matrix exist (batch converter case)
1003 // so create the default migration matrix
1004 wxLogVerbose("No Migration Matrix exists so creating it"); // JMDBG
1005 //cout << "No Migration Matrix exists so creating it" << endl;
1006
1007 if ( m_structures.GetDivergenceState())
1008 {
1009 mig = new TiXmlElement( xmlstr::XML_TAG_DIVMIG.c_str());
1010 //cout << "Divergence Migration Matrix being output" << endl;
1011 }
1012 else
1013 {
1014 mig = new TiXmlElement( xmlstr::XML_TAG_MIGRATION.c_str());
1015 //cout << "Migration Matrix being output" << endl;
1016 }
1017 m_structures.MakeMigrationMatrix();
1018 /*
1019 gcDisplayOrder popids = m_structures.GetDisplayablePopIds();
1020
1021 for(gcDisplayOrder::const_iterator iter = popids.begin(); iter != popids.end(); iter++)
1022 {
1023 for(gcDisplayOrder::const_iterator jter = popids.begin(); jter != popids.end(); jter++)
1024 {
1025 if (*jter!=*iter)
1026 {
1027 gcMigration Mig1 = m_structures.MakeMigration(true, *iter, *jter );
1028 gcMigration Mig2 = m_structures.MakeMigration(true, *jter, *iter );
1029 }
1030 }
1031 }
1032 */
1033 }
1034 forces->LinkEndChild(mig);
1035
1036 wxString startString = " ";
1037 wxString methodString = " ";
1038 wxString profilesString = " ";
1039 wxString constraintsString = " ";
1040
1041 constObjVector popsToDisplay = m_structures.GetConstDisplayablePops();
1042 if (popsToDisplay.size() > 0)
1043 {
1044
1045 // build full migration matrix data in a row by row manner
1046 const gcDisplayOrder popids = m_structures.GetDisplayablePopIds();
1047 const gcDisplayOrder parids = m_structures.GetParentIds();
1048
1049 // populations first
1050 for(gcDisplayOrder::const_iterator iter = popids.begin(); iter != popids.end(); iter++)
1051 {
1052 for(gcDisplayOrder::const_iterator jter = popids.begin(); jter != popids.end(); jter++)
1053 {
1054 if(m_structures.HasMigration(*iter, *jter))
1055 {
1056 // population / population pairs
1057 const gcMigration mig = m_structures.GetMigration(*iter, *jter);
1058 startString += mig.GetStartValueString();
1059 methodString += mig.GetMethodString();
1060 profilesString += mig.GetProfileAsString();
1061 constraintsString += mig.GetConstraintString();
1062 }
1063 else
1064 {
1065 // doesn't exist so not allowed
1066 startString += "0";
1067 methodString += gcstr_mig::migmethodUser;
1068 profilesString += gcstr_mig::migprofileNone ;
1069 constraintsString += gcstr_mig::migconstraintInvalid;
1070 }
1071 startString += " ";
1072 methodString += " ";
1073 profilesString += " ";
1074 constraintsString += " ";
1075 }
1076
1077 if (parids.size() > 0)
1078 {
1079 // population / parent pairs
1080 for(gcDisplayOrder::const_iterator kter = parids.begin(); kter != parids.end(); kter++)
1081 {
1082 if(m_structures.HasMigration(*iter, *kter))
1083 {
1084 // migration exists, get values
1085 const gcMigration mig = m_structures.GetMigration(*iter, *kter);
1086 startString += mig.GetStartValueString();
1087 methodString += mig.GetMethodString();
1088 profilesString += mig.GetProfileAsString();
1089 constraintsString += mig.GetConstraintString();
1090 }
1091 else
1092 {
1093 // doesn't exist so not allowed
1094 startString += "0";
1095 methodString += gcstr_mig::migmethodUser;
1096 profilesString += gcstr_mig::migprofileNone ;
1097 constraintsString += gcstr_mig::migconstraintInvalid;
1098 }
1099 startString += " ";
1100 methodString += " ";
1101 profilesString += " ";
1102 constraintsString += " ";
1103 }
1104 }
1105
1106 // spacers
1107 }
1108
1109 // now parents
1110 if (parids.size() > 0)
1111 {
1112 for(gcDisplayOrder::const_iterator iter = parids.begin(); iter != parids.end(); iter++)
1113 {
1114 for(gcDisplayOrder::const_iterator jter = popids.begin(); jter != popids.end(); jter++)
1115 {
1116 if(m_structures.HasMigration(*iter, *jter))
1117 {
1118 // parent / population pairs
1119 const gcMigration mig = m_structures.GetMigration(*iter, *jter);
1120 startString += mig.GetStartValueString();
1121 methodString += mig.GetMethodString();
1122 profilesString += mig.GetProfileAsString();
1123 constraintsString += mig.GetConstraintString();
1124 }
1125 else
1126 {
1127 // doesn't exist so not allowed
1128 startString += "0";
1129 methodString += gcstr_mig::migmethodUser;
1130 profilesString += gcstr_mig::migprofileNone ;
1131 constraintsString += gcstr_mig::migconstraintInvalid;
1132 }
1133 startString += " ";
1134 methodString += " ";
1135 profilesString += " ";
1136 constraintsString += " ";
1137 }
1138 // parent / parent pairs
1139 for(gcDisplayOrder::const_iterator kter = parids.begin(); kter != parids.end(); kter++)
1140 {
1141 if(m_structures.HasMigration(*iter, *kter))
1142 {
1143 // migration exists, get values
1144 const gcMigration mig = m_structures.GetMigration(*iter, *kter);
1145 startString += mig.GetStartValueString();
1146 methodString += mig.GetMethodString();
1147 profilesString += mig.GetProfileAsString();
1148 constraintsString += mig.GetConstraintString();
1149 }
1150 else
1151 {
1152 // doesn't exist so not allowed
1153 startString += "0";
1154 methodString += gcstr_mig::migmethodUser;
1155 profilesString += gcstr_mig::migprofileNone ;
1156 constraintsString += gcstr_mig::migconstraintInvalid;
1157 }
1158 startString += " ";
1159 methodString += " ";
1160 profilesString += " ";
1161 constraintsString += " ";
1162 }
1163 }
1164 }
1165 }
1166
1167 TiXmlElement * stvals = new TiXmlElement( xmlstr::XML_TAG_START_VALUES.c_str());
1168 TiXmlText * data = new TiXmlText(startString);
1169 stvals->LinkEndChild(data);
1170 mig->LinkEndChild(stvals);
1171
1172 TiXmlElement * method = new TiXmlElement( xmlstr::XML_TAG_METHOD.c_str());
1173 data = new TiXmlText(methodString);
1174 method->LinkEndChild(data);
1175 mig->LinkEndChild(method);
1176
1177 TiXmlElement * maxevts = new TiXmlElement( xmlstr::XML_TAG_MAX_EVENTS.c_str());
1178 wxString dataString = " 10000 ";
1179 data = new TiXmlText(dataString);
1180 maxevts->LinkEndChild(data);
1181 mig->LinkEndChild(maxevts);
1182
1183 TiXmlElement * profiles = new TiXmlElement( xmlstr::XML_TAG_PROFILES.c_str());
1184 data = new TiXmlText(profilesString);
1185 profiles->LinkEndChild(data);
1186 mig->LinkEndChild(profiles);
1187
1188 TiXmlElement * constraints = new TiXmlElement( xmlstr::XML_TAG_CONSTRAINTS.c_str());
1189 data = new TiXmlText(constraintsString);
1190 constraints->LinkEndChild(data);
1191 mig->LinkEndChild(constraints);
1192
1193 TiXmlElement * prior = new TiXmlElement( xmlstr::XML_TAG_PRIOR.c_str());
1194 prior->SetAttribute(xmlstr::XML_ATTRTYPE_TYPE.c_str(), "linear");
1195 mig->LinkEndChild(prior);
1196
1197 TiXmlElement * paramindex = new TiXmlElement( xmlstr::XML_TAG_PARAMINDEX.c_str());
1198 dataString = " default ";
1199 data = new TiXmlText(dataString);
1200 paramindex->LinkEndChild(data);
1201 prior->LinkEndChild(paramindex);
1202
1203 TiXmlElement * lower = new TiXmlElement( xmlstr::XML_TAG_PRIORLOWERBOUND.c_str());
1204 dataString = " 0.0 ";
1205 data = new TiXmlText(dataString);
1206 lower->LinkEndChild(data);
1207 prior->LinkEndChild(lower);
1208
1209 TiXmlElement * upper = new TiXmlElement( xmlstr::XML_TAG_PRIORUPPERBOUND.c_str());
1210 dataString = " 100.0 ";
1211 data = new TiXmlText(dataString);
1212 upper->LinkEndChild(data);
1213 prior->LinkEndChild(upper);
1214 wxLogVerbose(" xml out"); // JMDBG
1215 }
1216
1217 wxString
MakeUnlinkedName(wxString regionName,size_t index) const1218 GCDataStore::MakeUnlinkedName(wxString regionName, size_t index) const
1219 {
1220 return wxString::Format(gcstr::regionNameUnlinked,regionName.c_str(),(int)index);
1221 }
1222
1223 TiXmlElement *
MakeUnlinkedSite(const gcExportable & expo,const gcRegion & regionRef,const gcLocus & locus,size_t index) const1224 GCDataStore::MakeUnlinkedSite(const gcExportable & expo, const gcRegion & regionRef, const gcLocus & locus, size_t index) const
1225 {
1226 TiXmlElement * regionElement = new TiXmlElement( xmlstr::XML_TAG_REGION.c_str());
1227 regionElement->SetAttribute( xmlstr::XML_ATTRTYPE_NAME.c_str(),
1228 MakeUnlinkedName(regionRef.GetName(),index).c_str());
1229
1230 // EWFIX.P3 -- need to fix spacing info ??
1231
1232 constObjVector pops = m_structures.GetConstDisplayablePops();
1233 for(constObjVector::const_iterator iter=pops.begin(); iter != pops.end(); iter++)
1234 {
1235 const gcPopulation * popP = dynamic_cast<const gcPopulation*>(*iter);
1236 assert(popP != NULL);
1237
1238 const gcNameResolvedInfo & info = expo.GetInfo(*popP,regionRef);
1239 TiXmlElement * popElement =
1240 MakePopulationElemForUnlinkedLocus(info,*popP,regionRef,locus,index);
1241 if(popElement != NULL)
1242 {
1243 regionElement->LinkEndChild(popElement);
1244 }
1245 }
1246 return regionElement;
1247 }
1248
1249 std::vector<TiXmlElement*>
MakeUnlinkedRegionElems(const gcExportable & expo,const gcRegion & regionRef) const1250 GCDataStore::MakeUnlinkedRegionElems(const gcExportable & expo, const gcRegion & regionRef) const
1251 {
1252 std::vector<TiXmlElement*> elements;
1253
1254 // make sure that we are accessing these by order of creation -- a good choice for
1255 // batch converter and for loci within a single input file. Less clear how good
1256 // this is if users have been munging about in the gui
1257 gcIdVec locIds = GetStructures().GetLocusIdsForRegionByCreation(regionRef.GetId());
1258
1259 for(gcIdVec::const_iterator i = locIds.begin(); i != locIds.end(); i++)
1260 {
1261 size_t locusId = *i;
1262 const gcLocus & locus = m_structures.GetLocus(locusId);
1263 if(!locus.GetLinked())
1264 {
1265 for(size_t index=0; index < locus.GetNumMarkers(); index++)
1266 {
1267 TiXmlElement * unlinkedElem = MakeUnlinkedSite(expo,regionRef,locus,index);
1268 elements.push_back(unlinkedElem);
1269 }
1270 }
1271 }
1272
1273 return elements;
1274 }
1275
1276
1277 TiXmlDocument *
ExportFile()1278 GCDataStore::ExportFile()
1279 {
1280 TiXmlDocument * docP = new TiXmlDocument();
1281 try
1282 {
1283 TiXmlDeclaration * decl = new TiXmlDeclaration( "1.0", "", "" );
1284 docP->LinkEndChild( decl );
1285
1286 if(!(m_commentString.IsEmpty()))
1287 {
1288 TiXmlComment * comment = new TiXmlComment();
1289 comment->SetValue(m_commentString.c_str());
1290 docP->LinkEndChild(comment);
1291 }
1292
1293 TiXmlElement * lamarc = new TiXmlElement( xmlstr::XML_TAG_LAMARC.c_str() );
1294 docP->LinkEndChild( lamarc );
1295 lamarc->SetAttribute( xmlstr::XML_ATTRTYPE_VERSION.c_str(),VERSION);
1296
1297 TiXmlElement * format = new TiXmlElement( xmlstr::XML_TAG_FORMAT.c_str() );
1298 lamarc->LinkEndChild( format );
1299 TiXmlElement * coordMunge = new TiXmlElement( xmlstr::XML_TAG_CONVERT_OUTPUT.c_str() );
1300 format->LinkEndChild( coordMunge );
1301 bool anyZeroes = GetStructures().AnyZeroes(); // EWFIX.P4 -- wasteful
1302 TiXmlText * doConvertText = new TiXmlText(anyZeroes ? "false" : "true");
1303 coordMunge->LinkEndChild(doConvertText);
1304
1305 // add migration if more than one population
1306 if (m_structures.GetPopCount() > 1)
1307 {
1308 TiXmlElement * forces = new TiXmlElement( xmlstr::XML_TAG_FORCES.c_str() );
1309 lamarc->LinkEndChild( forces );
1310 AddMigrationElem(forces);
1311
1312 if (m_structures.GetDivergenceState())
1313 {
1314 AddDivergenceElem(forces);
1315 }
1316 }
1317
1318 TiXmlElement * data = new TiXmlElement( xmlstr::XML_TAG_DATA.c_str() );
1319 lamarc->LinkEndChild( data );
1320
1321 const gcExportable exportable = BuildExportableData();
1322
1323 // do all regions that contain linked data
1324 constObjVector regions = m_structures.GetConstDisplayableRegions();
1325 for(constObjVector::const_iterator gIter = regions.begin();
1326 gIter != regions.end(); gIter++)
1327 {
1328 const gcRegion * regionP = dynamic_cast<const gcRegion*>(*gIter);
1329 assert(regionP != NULL);
1330 if(m_structures.RegionHasAnyLinkedLoci(regionP->GetId()))
1331 {
1332 AddRegionElem(data,exportable,*regionP);
1333 }
1334 }
1335
1336 // do all regions that contain unlinked data
1337 for(constObjVector::const_iterator gIter = regions.begin();
1338 gIter != regions.end(); gIter++)
1339 {
1340 const gcRegion * regionP = dynamic_cast<const gcRegion*>(*gIter);
1341 assert(regionP != NULL);
1342 if(m_structures.RegionHasAnyUnLinkedLoci(regionP->GetId()))
1343 {
1344 std::vector<TiXmlElement *> regionElems = MakeUnlinkedRegionElems(exportable,*regionP);
1345 for(std::vector<TiXmlElement*>::iterator i=regionElems.begin(); i != regionElems.end(); i++)
1346 {
1347 data->LinkEndChild( *i );
1348 }
1349 }
1350 }
1351 }
1352 catch(const gc_ex & e)
1353 {
1354 delete docP;
1355 throw;
1356 }
1357
1358 return docP;
1359 }
1360
1361 void
WriteExportedData(TiXmlDocument * docP)1362 GCDataStore::WriteExportedData(TiXmlDocument * docP)
1363 {
1364 docP->SaveFile( m_outfileName.c_str() );
1365 }
1366
1367 void
ThrowLocusWithoutDataType(wxString locusName,wxString regionName) const1368 GCDataStore::ThrowLocusWithoutDataType(wxString locusName, wxString regionName) const
1369 {
1370 wxString msg = wxString::Format(gcerr::locusWithoutDataType,
1371 locusName.c_str(),
1372 regionName.c_str());
1373 gui_error g(msg);
1374 throw g;
1375 }
1376
1377 GCIndividual &
makeOrFetchInd(wxString indName,std::map<wxString,GCIndividual * > & nameDict,gcNameResolvedInfo & info) const1378 GCDataStore::makeOrFetchInd( wxString indName,
1379 std::map<wxString,GCIndividual*> & nameDict,
1380 gcNameResolvedInfo & info) const
1381 {
1382 if(nameDict.find(indName) == nameDict.end())
1383 {
1384 GCIndividual * ind = new GCIndividual(indName,info.GetRegionRef());
1385 info.AddIndividual(ind);
1386 nameDict[indName] = ind;
1387 }
1388 std::map<wxString,GCIndividual*>::iterator iter = nameDict.find(indName);
1389 assert(iter != nameDict.end());
1390
1391 GCIndividual & ind = *((*iter).second);
1392 return ind;
1393 }
1394
1395 bool
AddLocusData(GCIndividual & ind,const gcLocus & locus,GCParseSample & sample) const1396 GCDataStore::AddLocusData(GCIndividual & ind,
1397 const gcLocus & locus,
1398 GCParseSample & sample) const
1399 {
1400 size_t samples = sample.GetSequencesPerLabel();
1401 bool foundAnything = false;
1402 wxString sampleName = sample.GetLabel();
1403 for(size_t i=0; i < samples; i++)
1404 {
1405 if(samples > 1 || (sampleName == ind.GetName()))
1406 {
1407 // EWFIX.P4.BUG.564 -- need to preserve information about how we made
1408 // this name so we can do better error reporting
1409 sampleName = wxString::Format("%s_%d",sample.GetLabel().c_str(),(int)i);
1410 }
1411 ind.AddSample(sampleName,locus,&(sample.GetData(i)));
1412 foundAnything = true;
1413 }
1414 return foundAnything;
1415 }
1416
1417 bool
AddLocusData(GCIndividual & ind,const gcLocus & locus,GCParseSample & sample,const wxArrayString & sampleNames) const1418 GCDataStore::AddLocusData(GCIndividual & ind,
1419 const gcLocus & locus,
1420 GCParseSample & sample,
1421 const wxArrayString & sampleNames) const
1422 {
1423 size_t numNames = sampleNames.Count();
1424 size_t numSamples = sample.GetSequencesPerLabel();
1425 bool foundAnything = false;
1426 if((numNames != 0) && (numNames != numSamples))
1427 {
1428 // EWFIX.P4.BUG.564 -- use more data to make a better message
1429 throw gc_ind_wrong_sample_count(ind);
1430 }
1431 for(size_t i=0; i < numSamples; i++)
1432 {
1433 wxString sampleName = "";
1434 if(numNames != 0) sampleName = sampleNames[i];
1435 ind.AddSample(sampleName,locus,&(sample.GetData(i)));
1436 foundAnything = true;
1437 }
1438 return foundAnything;
1439 }
1440
1441 bool
FillExportInfo(gcNameResolvedInfo & info,gcPhaseInfo & phaseInfo) const1442 GCDataStore::FillExportInfo(gcNameResolvedInfo & info, gcPhaseInfo & phaseInfo) const
1443 {
1444 bool regionFoundAnything = false;
1445 std::map<wxString,GCIndividual*> nameDict;
1446
1447 size_t popId = info.GetPopRef().GetId();
1448 size_t regionId = info.GetRegionRef().GetId();
1449
1450 // get list of loci for region
1451 constObjVector loci = m_structures.GetConstDisplayableLociInMapOrderFor(regionId);
1452
1453 for(constObjVector::const_iterator lIter = loci.begin();
1454 lIter != loci.end(); lIter++)
1455 {
1456 const gcLocus * locusP = dynamic_cast<const gcLocus*>(*lIter);
1457 assert(locusP != NULL);
1458
1459 wxString locusName = locusP->GetName();
1460
1461 gcSpecificDataType dtype = locusP->GetDataType();
1462 if(dtype == sdatatype_NONE_SET)
1463 {
1464 ThrowLocusWithoutDataType(locusP->GetName(),info.GetRegionRef().GetName());
1465 }
1466
1467 size_t locusId = locusP->GetId();
1468 constBlockVector blocks = GetBlocks(popId,locusId);
1469 for(constBlockVector::const_iterator bIter = blocks.begin();
1470 bIter != blocks.end(); bIter++)
1471 {
1472 assert(*bIter != NULL);
1473 bool locusFoundAnything = false;
1474
1475 const GCParseBlock & block = **bIter;
1476
1477 const GCParseSamples & samples = block.GetSamples();
1478
1479 // EWFIX.P3 -- what I really need to do here is
1480 // to have an individual name and a sample name for each
1481 // member data item within each sample
1482 for(size_t i=0; i < samples.size(); i++)
1483 {
1484 GCParseSample & sample = *(samples[i]);
1485
1486 const wxString & thisName = sample.GetLabel();
1487 bool isInd = phaseInfo.HasIndividualRecord(thisName);
1488 bool isSam = phaseInfo.HasSampleRecord(thisName);
1489
1490 if( (isInd) && (isSam) )
1491 {
1492 const gcPhaseRecord & rec1 = phaseInfo.GetIndividualRecord(thisName);
1493 const gcPhaseRecord & rec2 = phaseInfo.GetSampleRecord(thisName);
1494 throw gc_both_individual_and_sample(thisName,rec1,rec2);
1495 }
1496
1497 if (isInd)
1498 {
1499 GCIndividual & ind = makeOrFetchInd(thisName,nameDict,info);
1500 const gcPhaseRecord & rec = phaseInfo.GetIndividualRecord(thisName);
1501
1502 // get phenotype names
1503 // find those that apply to this region info.GetRegionRef();
1504 // get geno resolutions for them
1505 const gcIdSet & phenoIds = rec.GetPhenotypeIds();
1506 const GCTraitInfoSet & traits = info.GetRegionRef().GetTraitInfoSet();
1507 for(gcIdSet::const_iterator i=phenoIds.begin(); i != phenoIds.end(); i++)
1508 {
1509 size_t phenotypeId = *i;
1510 for(GCTraitInfoSet::const_iterator j=traits.begin(); j != traits.end(); j++)
1511 {
1512 const gcTraitInfo & traitRef = GetStructures().GetTrait(*j);
1513 const gcPhenotype & phenoRef = GetStructures().GetPhenotype(phenotypeId);
1514 if(traitRef.HasPhenotype(phenoRef))
1515 {
1516 ind.AddPhenotype(phenoRef);
1517 }
1518 }
1519 }
1520
1521 if(rec.HasUnphased(locusName))
1522 {
1523 ind.AddPhase(*locusP,rec.GetUnphased(locusName));
1524 }
1525 if(rec.HasSamples())
1526 {
1527 const wxArrayString & sampleNames = rec.GetSamples();
1528 locusFoundAnything = AddLocusData(ind,*locusP,sample,sampleNames) || locusFoundAnything;
1529 }
1530 else
1531 {
1532 locusFoundAnything = AddLocusData(ind,*locusP,sample) || locusFoundAnything;
1533 }
1534 }
1535
1536 if (isSam)
1537 {
1538 const gcPhaseRecord & rec = phaseInfo.GetSampleRecord(thisName);
1539 wxString indName;
1540 if(rec.HasIndividual())
1541 {
1542 indName = rec.GetIndividual();
1543 }
1544 else
1545 {
1546 indName = gcdefault::createdIndividualPrefix;
1547 assert(rec.HasSamples());
1548 const wxArrayString & samples = rec.GetSamples();
1549 for(size_t i = 0; i < samples.Count(); i++)
1550 {
1551 indName = wxString::Format("%s:%s",
1552 indName.c_str(),
1553 samples[i].c_str());
1554 }
1555 }
1556 GCIndividual & ind = makeOrFetchInd(indName,nameDict,info);
1557 // EWFIX.P3 -- refactor -- appears above
1558 // get phenotype names
1559 // find those that apply to this region info.GetRegionRef();
1560 // get geno resolutions for them
1561 const gcIdSet & phenoIds = rec.GetPhenotypeIds();
1562 const GCTraitInfoSet & traits = info.GetRegionRef().GetTraitInfoSet();
1563 for(gcIdSet::const_iterator i=phenoIds.begin(); i != phenoIds.end(); i++)
1564 {
1565 size_t phenotypeId = *i;
1566 for(GCTraitInfoSet::const_iterator j=traits.begin(); j != traits.end(); j++)
1567 {
1568 const gcTraitInfo & traitRef = GetStructures().GetTrait(*j);
1569 const gcPhenotype & phenoRef = GetStructures().GetPhenotype(phenotypeId);
1570 if(traitRef.HasPhenotype(phenoRef))
1571 {
1572 ind.AddPhenotype(phenoRef);
1573 }
1574 }
1575 }
1576 locusFoundAnything = AddLocusData(ind,*locusP,sample) || locusFoundAnything;
1577 if(rec.HasUnphased(locusName))
1578 {
1579 ind.AddPhase(*locusP,rec.GetUnphased(locusName));
1580 }
1581 }
1582
1583 if(!isInd && !isSam)
1584 // doesn't have any phase information
1585 {
1586 GCIndividual & ind = makeOrFetchInd(thisName,nameDict,info);
1587 locusFoundAnything = AddLocusData(ind,*locusP,sample) || locusFoundAnything;
1588 }
1589 }
1590
1591 #if 0 // Potentially DEAD CODE (bobgian, Feb 2010)
1592 // At the moment, this cannot happen without
1593 // causing another error (either missing_pop_region or
1594 // missing_sample).
1595 if(!locusFoundAnything)
1596 // EWFIX.P3.BUG.511 -- eventually this restriction should
1597 // be relaxed, but at the moment we require all pop,locus
1598 // pairs have data
1599 {
1600 throw gc_data_missing_pop_locus(info.GetPopRef().GetName(),locusP->GetName());
1601 }
1602 #endif
1603 regionFoundAnything = regionFoundAnything || locusFoundAnything;
1604 }
1605 }
1606 if(!regionFoundAnything)
1607 // EWFIX.P3.BUG.511 -- eventually this restriction should
1608 // be relaxed, but at the moment we require all pop,locus
1609 // pairs have data
1610 {
1611 throw gc_data_missing_pop_region(info.GetPopRef().GetName(),info.GetRegionRef().GetName());
1612 }
1613 return regionFoundAnything;
1614 }
1615
1616 gcExportable
BuildExportableData() const1617 GCDataStore::BuildExportableData() const
1618 {
1619 bool foundAnything = false;
1620 gcExportable exportable;
1621
1622 constObjVector regions = m_structures.GetConstDisplayableRegions();
1623 constObjVector pops = m_structures.GetConstDisplayablePops();
1624
1625 gcPhaseInfo * phaseInfo = BuildPhaseInfo();
1626 assert(phaseInfo != NULL);
1627
1628 try
1629 {
1630 DiagnosePhaseInfoProblems(*phaseInfo);
1631
1632 for(constObjVector::const_iterator gIter = regions.begin();
1633 gIter != regions.end(); gIter++)
1634 {
1635 const gcRegion * regionP = dynamic_cast<const gcRegion*>(*gIter);
1636 assert(regionP != NULL);
1637 bool regionFoundAnything = false;
1638 for(constObjVector::const_iterator iter = pops.begin(); iter != pops.end(); iter++)
1639 {
1640 const gcPopulation * popP = dynamic_cast<const gcPopulation*>(*iter);
1641 assert(popP != NULL);
1642
1643 gcNameResolvedInfo * info = new gcNameResolvedInfo(*popP,*regionP);
1644 try
1645 {
1646 bool localFoundAnything = FillExportInfo(*info,*phaseInfo);
1647 foundAnything |= localFoundAnything;
1648 regionFoundAnything |= localFoundAnything;
1649 gcPopRegionPair infoPair(popP,regionP);
1650 assert(exportable.find(infoPair) == exportable.end());
1651 exportable[infoPair] = info;
1652 }
1653 catch(const gc_ex & e)
1654 {
1655 delete info;
1656 throw;
1657 }
1658 }
1659 if(!regionFoundAnything)
1660 {
1661 throw gui_error(wxString::Format(gcerr::regionNoData,regionP->GetName().c_str()));
1662 }
1663 }
1664 }
1665 catch(const gc_ex & e)
1666 {
1667 delete phaseInfo;
1668 throw;
1669 }
1670
1671 delete phaseInfo;
1672
1673 if(!foundAnything)
1674 {
1675 gui_error g(gcerr::noDataFound);
1676 throw g;
1677 }
1678
1679 // EWFIX.P3 -- we should gather info about ghost populations,
1680 // and other unexpected data gaps
1681
1682 return exportable;
1683 }
1684
1685 //____________________________________________________________________________________
1686