1 // $Id: gc_datastore_writebatch.cpp,v 1.10 2011/06/22 18:22:22 jmcgill Exp $
2
3 /*
4 Copyright 2002 Mary Kuhner, Jon Yamato, and Joseph Felsenstein
5
6 This software is distributed free of charge for non-commercial use
7 and is copyrighted. Of course, we do not guarantee that the software
8 works, and are not responsible for any damage you may cause or have.
9 */
10
11 #include <cassert>
12
13 #include "cnv_strings.h"
14 #include "gc_data.h"
15 #include "gc_datastore.h"
16 #include "gc_parse_block.h"
17 #include "gc_phase_info.h"
18 #include "gc_strings.h"
19 #include "tinyxml.h"
20 #include "wx/datetime.h"
21
22 //------------------------------------------------------------------------------------
23
24 TiXmlElement *
CmdExportIndividuals() const25 GCDataStore::CmdExportIndividuals() const
26 {
27 const stringToRecord & indRecords = m_phaseInfo.GetIndividualRecords();
28 if(indRecords.empty())
29 {
30 return NULL;
31 }
32 TiXmlElement * indsElem = new TiXmlElement(cnvstr::TAG_INDIVIDUALS.c_str());
33
34 for(stringToRecord::const_iterator i=indRecords.begin(); i != indRecords.end(); i++)
35 {
36 const wxString & iName = (*i).first;
37 const gcPhaseRecord & rec = (*i).second;
38
39 TiXmlElement * iElem = new TiXmlElement(cnvstr::TAG_INDIVIDUAL.c_str());
40 indsElem->LinkEndChild(iElem);
41
42 TiXmlElement * name = new TiXmlElement(cnvstr::TAG_NAME.c_str());
43 iElem->LinkEndChild(name);
44 TiXmlText * iNameText = new TiXmlText(iName.c_str());
45 name->LinkEndChild(iNameText);
46
47 const gcIdSet & phenoIds = rec.GetPhenotypeIds();
48 for(gcIdSet::const_iterator pIter = phenoIds.begin(); pIter != phenoIds.end(); pIter++)
49 {
50 const gcPhenotype & pheno = GetStructures().GetPhenotype(*pIter);
51 if(pheno.HasExplicitName())
52 {
53 TiXmlElement * phenoRefE = new TiXmlElement(cnvstr::TAG_HAS_PHENOTYPE.c_str());
54 iElem->LinkEndChild(phenoRefE);
55 TiXmlText * phenoNameText = new TiXmlText(pheno.GetName().c_str());
56 phenoRefE->LinkEndChild(phenoNameText);
57 }
58 else
59 {
60 iElem->LinkEndChild(CmdExportGenoReso(pheno));
61 }
62 }
63
64 const wxArrayString & sampleNames = rec.GetSamples();
65 for(size_t index=0; index < sampleNames.Count(); index++)
66 {
67 const wxString & sampleName = sampleNames[index];
68 TiXmlElement * sampleE = new TiXmlElement(cnvstr::TAG_SAMPLE.c_str());
69 iElem->LinkEndChild(sampleE);
70 TiXmlElement * nameE = new TiXmlElement(cnvstr::TAG_NAME.c_str());
71 sampleE->LinkEndChild(nameE);
72 TiXmlText * nameText = new TiXmlText(sampleName);
73 nameE->LinkEndChild(nameText);
74 }
75 }
76
77 return indsElem;
78 }
79
80 TiXmlElement *
CmdExportInfile(const GCFile & fileRef) const81 GCDataStore::CmdExportInfile(const GCFile& fileRef) const
82 {
83 TiXmlElement * fileE = new TiXmlElement(cnvstr::TAG_INFILE.c_str());
84
85 // file attributes
86 GCFileFormat fform = fileRef.GetFormat();
87 fileE->SetAttribute(cnvstr::ATTR_FORMAT.c_str(),ToWxString(fform).c_str());
88
89 gcGeneralDataType dtype;
90
91 if(GetStructures().HasParse(fileRef))
92 {
93 const GCParse & parseRef = GetStructures().GetParse(fileRef);
94 constBlockVector blocks = parseRef.GetBlocks();
95 if(!blocks.empty())
96 {
97 const GCParseBlock * blockP = blocks[0];
98 assert(blockP != NULL);
99 size_t locusId = GetStructures().GetLocusForBlock(blockP->GetId());
100 const gcLocus & locusRef = GetStructures().GetLocus(locusId);
101 dtype = locusRef.GetDataType();
102 }
103 else
104 {
105 dtype = parseRef.GetDataType();
106 }
107 }
108
109 if(dtype.size() != 1)
110 {
111 gcGeneralDataType dtype = fileRef.GetGeneralDataType();
112 }
113
114 if(dtype.size() != 1)
115 // put comment in to tell user what to do
116 {
117 TiXmlComment * comment = new TiXmlComment();
118 comment->SetValue(gcstr::instructionsMultipleDataTypes.c_str());
119 fileE->LinkEndChild(comment);
120
121 }
122 fileE->SetAttribute(cnvstr::ATTR_DATATYPE.c_str(),ToWxString(dtype).c_str());
123
124 GCInterleaving inter = fileRef.GetInterleaving();
125 if(inter == interleaving_MOOT)
126 // sequences fit on one line, so assume
127 // it's sequential
128 {
129 inter = interleaving_SEQUENTIAL;
130 }
131 fileE->SetAttribute(cnvstr::ATTR_SEQUENCEALIGNMENT.c_str(),ToWxString(inter));
132
133 // name
134 TiXmlElement * name = new TiXmlElement(cnvstr::TAG_NAME.c_str());
135 TiXmlText * fname = new TiXmlText(fileRef.GetName().c_str());
136 name->LinkEndChild(fname);
137 fileE->LinkEndChild(name);
138
139 const GCLocusMatcher & locMatch = GetStructures().GetLocusMatcher(fileRef);
140 loc_match locMatchType = locMatch.GetLocMatchType();
141
142 TiXmlElement * lMatch = new TiXmlElement(cnvstr::TAG_SEGMENTS_MATCHING.c_str());
143 fileE->LinkEndChild(lMatch);
144 lMatch->SetAttribute(cnvstr::ATTR_TYPE.c_str(),ToWxString(locMatchType).c_str());
145
146 if(locMatchType != locmatch_DEFAULT)
147 {
148 if(locMatchType == locmatch_SINGLE)
149 {
150 const wxArrayString & locNames = locMatch.GetLociNames();
151 TiXmlText * lname = new TiXmlText(locNames[0].c_str());
152 lMatch->LinkEndChild(lname);
153 }
154
155 if(locMatchType == locmatch_VECTOR)
156 {
157 const wxArrayString & locNames = locMatch.GetLociNames();
158 for(size_t i=0; i < locNames.Count(); i++)
159 {
160 TiXmlElement * locName = new TiXmlElement(cnvstr::TAG_SEGMENT_NAME.c_str());
161 lMatch->LinkEndChild(locName);
162 TiXmlText * lname = new TiXmlText(locNames[i].c_str());
163 locName->LinkEndChild(lname);
164 }
165 }
166 }
167
168 const GCPopMatcher & popMatch = GetStructures().GetPopMatcher(fileRef);
169 pop_match popMatchType = popMatch.GetPopMatchType();
170
171 TiXmlElement * pMatch = new TiXmlElement(cnvstr::TAG_POP_MATCHING.c_str());
172 fileE->LinkEndChild(pMatch);
173 pMatch->SetAttribute(cnvstr::ATTR_TYPE.c_str(),ToWxString(popMatchType).c_str());
174
175 if(popMatchType != popmatch_DEFAULT)
176 {
177
178 if(popMatchType == popmatch_SINGLE)
179 {
180 const wxArrayString & popNames = popMatch.GetPopNames();
181 TiXmlText * pname = new TiXmlText(popNames[0].c_str());
182 pMatch->LinkEndChild(pname);
183 }
184
185 if(popMatchType == popmatch_VECTOR)
186 {
187 const wxArrayString & popNames = popMatch.GetPopNames();
188 for(size_t i=0; i < popNames.Count(); i++)
189 {
190 TiXmlElement * popName = new TiXmlElement(cnvstr::TAG_POP_NAME.c_str());
191 pMatch->LinkEndChild(popName);
192 TiXmlText * pname = new TiXmlText(popNames[i].c_str());
193 popName->LinkEndChild(pname);
194 }
195 }
196 }
197
198 if(GetStructures().HasHapFileAdjacent(fileRef.GetId()))
199 {
200 size_t numAdj = GetStructures().GetHapFileAdjacent(fileRef.GetId());
201 TiXmlElement * adj = new TiXmlElement(cnvstr::TAG_INDIVIDUALS_FROM_SAMPLES.c_str());
202 adj->SetAttribute(cnvstr::ATTR_TYPE.c_str(),cnvstr::ATTR_VAL_BYADJACENCY.c_str());
203 fileE->LinkEndChild(adj);
204 TiXmlText * adjVal = new TiXmlText(wxString::Format("%d",(int)numAdj).c_str());
205 adj->LinkEndChild(adjVal);
206 }
207
208 return fileE;
209 }
210
211 TiXmlElement *
CmdExportLocus(const gcLocus & locusRef) const212 GCDataStore::CmdExportLocus(const gcLocus& locusRef) const
213 {
214 TiXmlElement * locE = new TiXmlElement(cnvstr::TAG_SEGMENT.c_str());
215 TiXmlText * name = new TiXmlText(locusRef.GetName().c_str());
216 locE->LinkEndChild(name);
217 return locE;
218 }
219
220 TiXmlElement *
CmdExportPhenotype(const gcPhenotype & pheno) const221 GCDataStore::CmdExportPhenotype(const gcPhenotype & pheno) const
222 {
223 TiXmlElement * phenoE = new TiXmlElement(cnvstr::TAG_PHENOTYPE.c_str());
224
225 // name
226 TiXmlElement * name = new TiXmlElement(cnvstr::TAG_NAME.c_str());
227 phenoE->LinkEndChild(name);
228 TiXmlText * nameText = new TiXmlText(pheno.GetName());
229 name->LinkEndChild(nameText);
230
231 // geno reso
232 phenoE->LinkEndChild(CmdExportGenoReso(pheno));
233 return phenoE;
234 }
235
236 TiXmlElement *
CmdExportGenoReso(const gcPhenotype & pheno) const237 GCDataStore::CmdExportGenoReso(const gcPhenotype & pheno) const
238 {
239 // geno reso
240 TiXmlElement * genoE = new TiXmlElement(cnvstr::TAG_GENO_RESOLUTIONS.c_str());
241
242 assert(pheno.HasTraitId());
243 const gcTraitInfo & trait = GetStructures().GetTrait(pheno.GetTraitId());
244 TiXmlElement * tName = new TiXmlElement(cnvstr::TAG_TRAIT_NAME.c_str());
245 genoE->LinkEndChild(tName);
246 TiXmlText * tNameText = new TiXmlText(trait.GetName());
247 tName->LinkEndChild(tNameText);
248
249 const std::vector<gcHapProbability> & hapProbs = pheno.GetHapProbabilities();
250 assert(!hapProbs.empty());
251 for(size_t i=0; i < hapProbs.size(); i++)
252 {
253 const gcHapProbability & hprob = hapProbs[i];
254 assert(hprob.HasPenetrance());
255
256 TiXmlElement * hapE = new TiXmlElement(cnvstr::TAG_HAPLOTYPES.c_str());
257 genoE->LinkEndChild(hapE);
258
259 TiXmlElement * pen = new TiXmlElement(cnvstr::TAG_PENETRANCE.c_str());
260 hapE->LinkEndChild(pen);
261 TiXmlText * penText = new TiXmlText(wxString::Format("%f",hprob.GetPenetrance()));
262 pen->LinkEndChild(penText);
263
264 wxString alleleString = " ";
265 const gcIdVec & alleleIds = hprob.GetAlleleIds();
266 for(gcIdVec::const_iterator iter = alleleIds.begin(); iter != alleleIds.end(); iter++)
267 {
268 alleleString += GetStructures().GetAllele(*iter).GetName();
269 alleleString += " ";
270 }
271 TiXmlElement * alleles = new TiXmlElement(cnvstr::TAG_ALLELES.c_str());
272 hapE->LinkEndChild(alleles);
273 TiXmlText * allelesText = new TiXmlText(alleleString);
274 alleles->LinkEndChild(allelesText);
275 }
276
277 return genoE;
278 }
279
280 TiXmlElement *
CmdExportPop(const gcPopulation & popRef) const281 GCDataStore::CmdExportPop(const gcPopulation & popRef) const
282 {
283 TiXmlElement * popE = new TiXmlElement(cnvstr::TAG_POPULATION.c_str());
284 TiXmlText * name = new TiXmlText(popRef.GetName().c_str());
285 popE->LinkEndChild(name);
286 return popE;
287 }
288
289 TiXmlElement *
CmdExportTrait(const gcTraitInfo & traitRef) const290 GCDataStore::CmdExportTrait(const gcTraitInfo & traitRef) const
291 {
292 TiXmlElement * traitE = new TiXmlElement(cnvstr::TAG_TRAIT_INFO.c_str());
293
294 // one name
295 TiXmlElement * traitName = new TiXmlElement(cnvstr::TAG_NAME.c_str());
296 traitE->LinkEndChild(traitName);
297 TiXmlText * nameText = new TiXmlText(traitRef.GetName().c_str());
298 traitName->LinkEndChild(nameText);
299
300 // many alleles
301 const gcIdSet & alleleIds = traitRef.GetAlleleIds();
302 for(gcIdSet::const_iterator i = alleleIds.begin(); i != alleleIds.end(); i++)
303 {
304 const gcTraitAllele & alleleRef = GetStructures().GetAllele(*i);
305 TiXmlElement * alleleElem = new TiXmlElement(cnvstr::TAG_ALLELE.c_str());
306 traitE->LinkEndChild(alleleElem);
307 TiXmlText * alleleText = new TiXmlText(alleleRef.GetName().c_str());
308 alleleElem->LinkEndChild(alleleText);
309 }
310
311 return traitE;
312 }
313
314 TiXmlElement *
CmdExportRegion(const gcRegion & regRef) const315 GCDataStore::CmdExportRegion(const gcRegion & regRef) const
316 {
317 TiXmlElement * regE = new TiXmlElement(cnvstr::TAG_REGION.c_str());
318
319 // name
320 TiXmlElement * name = new TiXmlElement(cnvstr::TAG_NAME.c_str());
321 TiXmlText * rname = new TiXmlText(regRef.GetName().c_str());
322 name->LinkEndChild(rname);
323 regE->LinkEndChild(name);
324
325 // effective pop size
326 if(regRef.HasEffectivePopulationSize())
327 {
328 TiXmlElement * effPop = new TiXmlElement(cnvstr::TAG_EFFECTIVE_POPSIZE.c_str());
329 TiXmlText * esize = new TiXmlText(wxString::Format("%f",regRef.GetEffectivePopulationSize()).c_str());
330 effPop->LinkEndChild(esize);
331 regE->LinkEndChild(effPop);
332 }
333
334 // spacing -- EWFIX -- may drop down to next level
335 TiXmlElement * spacing = new TiXmlElement(cnvstr::TAG_SEGMENTS.c_str());
336 regE->LinkEndChild(spacing);
337 gcIdVec segIds = GetStructures().GetLocusIdsForRegionByMapPosition(regRef.GetId());
338 for(gcIdVec::iterator i=segIds.begin(); i != segIds.end(); i++)
339 {
340 size_t locusId = *i;
341 const gcLocus & locusRef = GetStructures().GetLocus(locusId);
342 spacing->LinkEndChild(CmdExportSegment(locusRef));
343 }
344
345 // trait location
346 const GCTraitInfoSet & traits = regRef.GetTraitInfoSet();
347 for(GCTraitInfoSet::const_iterator i=traits.begin(); i != traits.end(); i++)
348 {
349 const gcTraitInfo & traitInfo = GetStructures().GetTrait(*i);
350 TiXmlElement * traitElem = new TiXmlElement(cnvstr::TAG_TRAIT_LOCATION.c_str());
351 regE->LinkEndChild(traitElem);
352 TiXmlElement * traitName = new TiXmlElement(cnvstr::TAG_TRAIT_NAME.c_str());
353 traitElem->LinkEndChild(traitName);
354 TiXmlText * traitNameText = new TiXmlText(traitInfo.GetName().c_str());
355 traitName->LinkEndChild(traitNameText);
356 }
357
358 return regE;
359 }
360
361 TiXmlElement *
CmdExportSegment(const gcLocus & locusRef) const362 GCDataStore::CmdExportSegment(const gcLocus& locusRef) const
363 {
364 TiXmlElement * locusE = new TiXmlElement(cnvstr::TAG_SEGMENT.c_str());
365
366 locusE->SetAttribute(cnvstr::ATTR_DATATYPE.c_str(),locusRef.GetDataTypeString().c_str());
367 if(locusRef.HasLinkedUserValue())
368 {
369 locusE->SetAttribute(cnvstr::ATTR_PROXIMITY.c_str(),locusRef.GetLinkedUserValueString().c_str());
370 }
371
372 TiXmlElement * nameE = new TiXmlElement(cnvstr::TAG_NAME.c_str());
373 TiXmlText * nameText = new TiXmlText(locusRef.GetName().c_str());
374 nameE->LinkEndChild(nameText);
375 locusE->LinkEndChild(nameE);
376
377 if(locusRef.HasNumMarkers())
378 {
379 TiXmlElement * markersE = new TiXmlElement(cnvstr::TAG_MARKERS.c_str());
380 TiXmlText * markersText = new TiXmlText(locusRef.GetNumMarkersString().c_str());
381 markersE->LinkEndChild(markersText);
382 locusE->LinkEndChild(markersE);
383 }
384
385 if(locusRef.HasOffset())
386 {
387 TiXmlElement * offsetE = new TiXmlElement(cnvstr::TAG_FIRST_POSITION_SCANNED.c_str());
388 TiXmlText * offsetText = new TiXmlText(locusRef.GetOffsetString().c_str());
389 offsetE->LinkEndChild(offsetText);
390 locusE->LinkEndChild(offsetE);
391 }
392
393 if(locusRef.HasMapPosition())
394 {
395 TiXmlElement * mapE = new TiXmlElement(cnvstr::TAG_MAP_POSITION.c_str());
396 TiXmlText * mapText = new TiXmlText(locusRef.GetMapPositionString().c_str());
397 mapE->LinkEndChild(mapText);
398 locusE->LinkEndChild(mapE);
399 }
400
401 if(locusRef.HasTotalLength())
402 {
403 TiXmlElement * lengthE = new TiXmlElement(cnvstr::TAG_SCANNED_LENGTH.c_str());
404 TiXmlText * lengthText = new TiXmlText(locusRef.GetTotalLengthString().c_str());
405 lengthE->LinkEndChild(lengthText);
406 locusE->LinkEndChild(lengthE);
407 }
408
409 if(locusRef.HasLocations())
410 {
411 TiXmlElement * locationsE = new TiXmlElement(cnvstr::TAG_SCANNED_DATA_POSITIONS.c_str());
412 TiXmlText * locationsText = new TiXmlText(locusRef.GetLocationsAsString().c_str());
413 locationsE->LinkEndChild(locationsText);
414 locusE->LinkEndChild(locationsE);
415 }
416
417 if(locusRef.HasUnphasedMarkers())
418 {
419 TiXmlElement * unphasedE = new TiXmlElement(cnvstr::TAG_UNRESOLVED_MARKERS.c_str());
420 TiXmlText * unphasedText = new TiXmlText(locusRef.GetUnphasedMarkersAsString().c_str());
421 unphasedE->LinkEndChild(unphasedText);
422 locusE->LinkEndChild(unphasedE);
423 }
424
425 return locusE;
426 }
427
428 TiXmlDocument *
ExportBatch() const429 GCDataStore::ExportBatch() const
430 {
431 TiXmlDocument * docP = new TiXmlDocument();
432 TiXmlDeclaration * decl = new TiXmlDeclaration( "1.0", "", "" );
433 docP->LinkEndChild( decl );
434
435 TiXmlComment * comment = new TiXmlComment();
436 wxDateTime now = wxDateTime::Now();
437 comment->SetValue(wxString::Format(gcstr::batchOutComment,now.Format().c_str()).c_str());
438 docP->LinkEndChild(comment);
439
440 TiXmlElement * top = new TiXmlElement( cnvstr::TAG_CONVERTER_CMD.c_str() );
441 docP->LinkEndChild( top );
442 top->SetAttribute(cnvstr::ATTR_VERSION.c_str(),VERSION);
443
444 constObjVector traits = GetStructures().GetConstTraits();
445 if(!traits.empty())
446 {
447 TiXmlElement * traitsElem = new TiXmlElement( cnvstr::TAG_TRAITS.c_str() );
448 top->LinkEndChild( traitsElem );
449
450 for(constObjVector::const_iterator iter = traits.begin(); iter != traits.end(); iter++)
451 {
452 const gcTraitInfo * traitP = dynamic_cast<const gcTraitInfo*>(*iter);
453 assert (traitP != NULL);
454 traitsElem->LinkEndChild(CmdExportTrait(*traitP));
455 }
456
457 const gcPhenoMap & phenos = GetStructures().GetPhenotypeMap();
458 for(gcPhenoMap::const_iterator i = phenos.begin(); i != phenos.end(); i++)
459 {
460 const gcPhenotype & phenoRef = (*i).second;
461 if(phenoRef.HasExplicitName())
462 {
463 traitsElem->LinkEndChild(CmdExportPhenotype(phenoRef));
464 }
465 }
466
467 }
468
469 constObjVector regs = GetStructures().GetConstDisplayableRegions();
470 if(!regs.empty())
471 {
472 TiXmlElement * regsElem = new TiXmlElement( cnvstr::TAG_REGIONS.c_str() );
473 top->LinkEndChild( regsElem );
474 for(constObjVector::const_iterator iter = regs.begin(); iter != regs.end(); iter++)
475 {
476 const gcRegion * regP = dynamic_cast<const gcRegion*>(*iter);
477 assert (regP != NULL);
478 regsElem->LinkEndChild(CmdExportRegion(*regP));
479 }
480 }
481
482 constObjVector pops = GetStructures().GetConstDisplayablePops();
483 if(!pops.empty())
484 {
485 TiXmlElement * popsElem = new TiXmlElement( cnvstr::TAG_POPULATIONS.c_str() );
486 top->LinkEndChild( popsElem );
487 for(constObjVector::const_iterator iter = pops.begin(); iter != pops.end(); iter++)
488 {
489 const gcPopulation * popP = dynamic_cast<const gcPopulation*>(*iter);
490 assert (popP != NULL);
491 popsElem->LinkEndChild(CmdExportPop(*popP));
492 }
493 }
494
495 TiXmlElement * individualsElem = CmdExportIndividuals();
496 if(individualsElem != NULL)
497 {
498 top->LinkEndChild( individualsElem );
499 }
500
501 const dataFileSet & files = GetDataFiles();
502 if(!files.empty())
503 {
504 TiXmlElement * infElem = new TiXmlElement( cnvstr::TAG_INFILES.c_str() );
505 top->LinkEndChild( infElem );
506 for(dataFileSet::const_iterator iter= files.begin(); iter != files.end(); iter++)
507 {
508 const GCFile & fileRef = *(*iter);
509 infElem->LinkEndChild(CmdExportInfile(fileRef));
510 }
511 }
512
513 if(!(m_outfileName.IsEmpty()))
514 {
515 TiXmlElement * outf = new TiXmlElement( cnvstr::TAG_OUTFILE.c_str() );
516 top->LinkEndChild( outf );
517 TiXmlText * outName = new TiXmlText(m_outfileName.c_str());
518 outf->LinkEndChild(outName);
519 }
520
521 if(!(m_commentString.IsEmpty()))
522 {
523 TiXmlElement * commentElem = new TiXmlElement( cnvstr::TAG_ADDCOMMENT.c_str() );
524 top->LinkEndChild( commentElem );
525 TiXmlText * commentText = new TiXmlText(m_commentString.c_str());
526 commentElem->LinkEndChild(commentText);
527 }
528
529 return docP;
530
531 }
532
533 void
WriteBatchFile(TiXmlDocument * docP,wxString fileName)534 GCDataStore::WriteBatchFile(TiXmlDocument * docP, wxString fileName)
535 {
536 docP->SaveFile( fileName.c_str());
537 }
538
539 //____________________________________________________________________________________
540