1 /* $Id: src_writer.cpp 632624 2021-06-03 17:38:23Z ivanov $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Authors: Frank Ludwig, Justin Foley
27 *
28 * File Description: Write source qualifiers
29 *
30 */
31
32 #include <ncbi_pch.hpp>
33
34 #include <objects/general/general_macros.hpp>
35 #include <objects/general/User_object.hpp>
36 #include <objects/general/Dbtag.hpp>
37 #include <objects/general/Object_id.hpp>
38 #include <objects/seqfeat/PCRPrimerSet.hpp>
39 #include <objects/seqfeat/SeqFeatXref.hpp>
40 #include <objects/seqfeat/PCRReactionSet.hpp>
41 #include <objects/seqfeat/PCRReaction.hpp>
42 #include <objects/seqfeat/PCRPrimer.hpp>
43
44 #include <objmgr/seqdesc_ci.hpp>
45 #include <objmgr/util/create_defline.hpp>
46
47 #include <objects/seqtable/SeqTable_column.hpp>
48 #include <objects/seqtable/SeqTable_single_data.hpp>
49 #include <objects/seqtable/SeqTable_multi_data.hpp>
50 #include <objects/seq/seq_macros.hpp>
51
52 #include <objects/pub/Pub.hpp>
53 #include <objects/pub/Pub_equiv.hpp>
54 #include <objects/pub/Pub_set.hpp>
55
56 #include <objects/biblio/biblio__.hpp>
57 #include <objects/biblio/Cit_sub.hpp>
58 #include <objects/biblio/Auth_list.hpp>
59 #include <objects/biblio/Affil.hpp>
60 #include <objects/general/Name_std.hpp>
61
62 #include <objects/general/Date_.hpp>
63
64 #include <objtools/writers/writer_exception.hpp>
65 #include <objtools/readers/message_listener.hpp>
66 #include <objtools/writers/src_writer.hpp>
67 #include <objmgr/util/sequence.hpp>
68
69 #include <sstream>
70
71 BEGIN_NCBI_SCOPE
72 USING_SCOPE(objects);
73
74 CSrcWriter::HANDLERMAP CSrcWriter::sHandlerMap;
75 CSrcWriter::NAMEMAP CSrcWriter::sFieldnameToColname;
76 // ----------------------------------------------------------------------------
77 // Default Fields:
78 // ----------------------------------------------------------------------------
79 static const string arrDefaultSrcCheckFields[] = {
80 "id",
81 "gi",
82 "organism",
83 "taxid",
84 "specimen-voucher",
85 "culture-collection",
86 "bio-material",
87 "strain",
88 "sub-strain",
89 "isolate",
90 "sub-species",
91 "variety",
92 "forma",
93 "cultivar",
94 "ecotype",
95 "serotype",
96 "serovar",
97 "type-material",
98 "old-name",
99 "author",
100 "affil"
101 };
102
103 static const string arrDefaultSeqEntryFields[] = {
104 "id",
105 "gi",
106 "organism",
107 "taxid",
108 "localid",
109 "specimen-voucher",
110 "culture-collection",
111 "bio-material",
112 "strain",
113 "sub-strain",
114 "isolate",
115 "sub-species",
116 "variety",
117 "forma",
118 "cultivar",
119 "ecotype",
120 "serotype",
121 "serovar",
122 "type-material",
123 "old-name",
124 "author",
125 "affil"
126 };
127
128
129 const size_t countDefaultSrcCheckFields = sizeof(arrDefaultSrcCheckFields)/sizeof(string);
130
131 const CSrcWriter::FIELDS CSrcWriter::sDefaultSrcCheckFields(
132 arrDefaultSrcCheckFields, arrDefaultSrcCheckFields + countDefaultSrcCheckFields);
133
134 const CSrcWriter::FIELDS CSrcWriter::sAllSrcCheckFields(
135 xGetOrderedFieldNames(CSrcWriter::sDefaultSrcCheckFields));
136
137 const size_t countDefaultSeqEntryFields = sizeof(arrDefaultSeqEntryFields)/sizeof(string);
138
139 const CSrcWriter::FIELDS CSrcWriter::sDefaultSeqEntryFields(
140 arrDefaultSeqEntryFields, arrDefaultSeqEntryFields + countDefaultSeqEntryFields);
141
142 const CSrcWriter::FIELDS CSrcWriter::sAllSeqEntryFields(
143 xGetOrderedFieldNames(CSrcWriter::sDefaultSeqEntryFields));
144
145 // ----------------------------------------------------------------------------
WriteBioseqHandle(CBioseq_Handle bsh,const FIELDS & desiredFields,CNcbiOstream & out)146 bool CSrcWriter::WriteBioseqHandle(
147 CBioseq_Handle bsh,
148 const FIELDS& desiredFields,
149 CNcbiOstream& out)
150 // ----------------------------------------------------------------------------
151 {
152 FIELDS colNames = xProcessFieldNames(desiredFields);
153
154 if (!xGather(bsh, "", colNames)) {
155 return false;
156 }
157 if (!xFormatTabDelimited(colNames, out)) {
158 return false;
159 }
160 return true;
161 };
162
163
164 // ----------------------------------------------------------------------------
WriteBioseqHandles(const vector<pair<string,CBioseq_Handle>> & vecIdBsh,const FIELDS & desiredFields,CNcbiOstream & out,ILineErrorListener * pEC)165 bool CSrcWriter::WriteBioseqHandles(
166 const vector<pair<string,CBioseq_Handle> >& vecIdBsh,
167 const FIELDS& desiredFields,
168 CNcbiOstream& out,
169 ILineErrorListener* pEC)
170 // ----------------------------------------------------------------------------
171 {
172 typedef vector<pair<string,CBioseq_Handle> > HANDLES;
173 FIELDS colNames = xProcessFieldNames(desiredFields);
174
175 for (HANDLES::const_iterator it = vecIdBsh.begin(); it != vecIdBsh.end(); ++it) {
176 if (!xGather(it->second, it->first, colNames)) {
177 return false;
178 }
179 }
180
181 if (!xFormatTabDelimited(colNames,out)) {
182 return false;
183 }
184 return true;
185 };
186
187
188 // ----------------------------------------------------------------------------
WriteSeqEntry(const CSeq_entry & seqEntry,CScope & scope,CNcbiOstream & out,const bool nucsOnly)189 bool CSrcWriter::WriteSeqEntry(
190 const CSeq_entry& seqEntry,
191 CScope& scope,
192 CNcbiOstream& out,
193 const bool nucsOnly)
194 // ----------------------------------------------------------------------------
195 {
196 CSeq_entry_Handle handle = scope.AddTopLevelSeqEntry(seqEntry);
197 vector<pair<string,CBioseq_Handle> > vecIdBsh;
198 for (CBioseq_CI bci(handle); bci; ++bci) {
199 if(!nucsOnly || bci->IsNa()) {
200 vecIdBsh.push_back(make_pair("",*bci));
201 }
202 }
203 WriteBioseqHandles(vecIdBsh, sAllSeqEntryFields, out, 0);
204
205 return true;
206 }
207
208
209 // ----------------------------------------------------------------------------
xProcessFieldNames(const FIELDS & desiredFields)210 CSrcWriter::FIELDS CSrcWriter::xProcessFieldNames(
211 const FIELDS& desiredFields)
212 // ----------------------------------------------------------------------------
213 {
214 FIELDS colNames;
215 if (desiredFields[0] != "id") {
216 colNames.push_back("id");
217 }
218 for (FIELDS::const_iterator cit = desiredFields.begin();
219 cit != desiredFields.end(); ++cit) {
220 NAMEMAP::const_iterator mapIterator = sFieldnameToColname.find(xCompressFieldName(*cit));
221 if (mapIterator != sFieldnameToColname.end()) {
222 colNames.push_back(mapIterator->second);
223 } else {
224 colNames.push_back(*cit);
225 }
226 }
227 return colNames;
228 }
229
230
231 // ---------------------------------------------------------------------------
xPrepareTableColumn(const string & colName,const string & colTitle,const string & colDefault)232 void CSrcWriter::xPrepareTableColumn(
233 const string& colName,
234 const string& colTitle,
235 const string& colDefault)
236 // ---------------------------------------------------------------------------
237 {
238 COLUMNMAP::iterator it = mColnameToIndex.find(colName);
239 if (it == mColnameToIndex.end()) {
240 CRef<CSeqTable_column> pColumn(new CSeqTable_column());
241 pColumn->SetHeader().SetField_name(colName);
242 pColumn->SetHeader().SetTitle(colTitle);
243 pColumn->SetDefault().SetString(colDefault);
244 mColnameToIndex[colName] = mSrcTable->GetColumns().size();
245 mSrcTable->SetColumns().push_back(pColumn);
246 }
247 size_t index = mColnameToIndex[colName];
248 CSeqTable_column& column = *mSrcTable->SetColumns().at(index);
249 column.SetData().SetString();
250 while (column.GetData().GetString().size() < mSrcTable->GetNum_rows()) {
251 column.SetData().SetString().push_back(colDefault);
252 }
253 }
254
255
256 // ----------------------------------------------------------------------------
xInit()257 void CSrcWriter::xInit()
258 // ----------------------------------------------------------------------------
259 {
260 if (sHandlerMap.empty()) {
261 sHandlerMap["db"] = &CSrcWriter::xGatherDb;
262 sHandlerMap["taxid"] = &CSrcWriter::xGatherTaxonId;
263 sHandlerMap["division"] = &CSrcWriter::xGatherDivision;
264 sHandlerMap["genome"] = &CSrcWriter::xGatherGenome;
265 sHandlerMap["lineage"] = &CSrcWriter::xGatherOrgnameLineage;
266 sHandlerMap["common"] = &CSrcWriter::xGatherOrgCommon;
267 sHandlerMap["origin"] = &CSrcWriter::xGatherOrigin;
268 sHandlerMap["pcr-primers"] = &CSrcWriter::xGatherPcrPrimers;
269 sHandlerMap["organism"] = &CSrcWriter::xGatherTaxname;
270
271 sHandlerMap["note"] = &CSrcWriter::xGatherOrgModFeat;
272 sHandlerMap["subsource-note"] = &CSrcWriter::xGatherSubtypeFeat;
273
274 NAMELIST nameList = xGetOrgModSubtypeNames();
275 for (NAMELIST::const_iterator cit=nameList.begin();
276 cit != nameList.end(); ++cit) {
277 if (*cit != "other" && *cit != "common") {
278 sHandlerMap[*cit] = &CSrcWriter::xGatherOrgModFeat;
279 }
280 }
281
282 nameList = xGetSubSourceSubtypeNames();
283 for (NAMELIST::const_iterator cit=nameList.begin();
284 cit != nameList.end(); ++cit) {
285 if (*cit != "other") {
286 sHandlerMap[*cit] = &CSrcWriter::xGatherSubtypeFeat;
287 }
288 }
289 }
290
291
292 if (sFieldnameToColname.empty()) {
293 sFieldnameToColname["id"] = "id";
294 sFieldnameToColname["gi"] = "gi";
295 sFieldnameToColname["localid"] = "localid";
296 sFieldnameToColname["bankitid"] = "bankitid";
297 sFieldnameToColname["definition"] = "definition";
298 sFieldnameToColname["def"] = "definition";
299 sFieldnameToColname["defline"] = "definition";
300
301 sFieldnameToColname["author"] = "author";
302 sFieldnameToColname["affil"] = "affil";
303
304 sFieldnameToColname["db"] = "db";
305 sFieldnameToColname["org.db"] = "db";
306
307 sFieldnameToColname["taxid"] = "taxid";
308 sFieldnameToColname["div"] = "division";
309 sFieldnameToColname["division"] = "division";
310 sFieldnameToColname["genome"] = "genome";
311 sFieldnameToColname["lineage"] = "lineage";
312 sFieldnameToColname["common"] = "common";
313 sFieldnameToColname["org.common"] = "common";
314
315 sFieldnameToColname["origin"] = "origin";
316 sFieldnameToColname["pcrprimers"] = "pcr-primers";
317 sFieldnameToColname["organism"] = "organism";
318 sFieldnameToColname["taxname"] = "organism";
319 sFieldnameToColname["org.taxname"] = "organism";
320 sFieldnameToColname["org"] = "organism";
321
322 // OrgMod
323 sFieldnameToColname["note"] = "note";
324 sFieldnameToColname["orgmod.note"] = "note";
325
326 // Subsource
327 sFieldnameToColname["insertionseq"] = "insertion-seq";
328 sFieldnameToColname["plasmid"] = "plasmid";
329 sFieldnameToColname["transposon"] = "transposon";
330
331 sFieldnameToColname["subsourcenote"] = "subsource-note";
332 sFieldnameToColname["subsrcnote"] = "subsource-note";
333
334 NAMELIST nameList = xGetOrgModSubtypeNames();
335 for (NAMELIST::const_iterator cit=nameList.begin();
336 cit != nameList.end(); ++cit) {
337
338 if (*cit != "other") {
339 sFieldnameToColname[xCompressFieldName(*cit)] = *cit;
340 }
341 }
342
343 nameList = xGetSubSourceSubtypeNames();
344 for (NAMELIST::const_iterator cit=nameList.begin();
345 cit != nameList.end(); ++cit) {
346 if(*cit != "other") {
347 sFieldnameToColname[xCompressFieldName(*cit)] = *cit;
348 }
349 }
350
351 }
352
353
354 mSrcTable.Reset(new CSeq_table());
355 mSrcTable->SetNum_rows(0);
356 }
357
358
359 // ----------------------------------------------------------------------------
xGetOrderedFieldNames(const FIELDS & defaultFields)360 CSrcWriter::FIELDS CSrcWriter::xGetOrderedFieldNames(const FIELDS& defaultFields)
361 // ----------------------------------------------------------------------------
362 {
363 FIELDS orderedFields;
364 set<string> processedFields;
365
366 for (FIELDS::const_iterator cit=defaultFields.begin();
367 cit != defaultFields.end();
368 ++cit) {
369 string colName = *cit;
370 orderedFields.push_back(colName);
371 processedFields.insert(xCompressFieldName(colName));
372 }
373
374 FIELDS lexicalFields;
375 lexicalFields.push_back("organism");
376 lexicalFields.push_back("genome");
377 lexicalFields.push_back("pcr-primers");
378 lexicalFields.push_back("db");
379 lexicalFields.push_back("common");
380 lexicalFields.push_back("lineage");
381 lexicalFields.push_back("origin");
382 lexicalFields.push_back("note");
383 lexicalFields.push_back("subsource-note");
384 lexicalFields.push_back("division");
385 lexicalFields.push_back("definition");
386 lexicalFields.push_back("bankitid");
387
388 lexicalFields.push_back("author");
389 lexicalFields.push_back("affil");
390
391 NAMELIST nameList = xGetOrgModSubtypeNames();
392 for(NAMELIST::const_iterator cit=nameList.begin();
393 cit != nameList.end(); ++cit) {
394 if (*cit != "other" && *cit != "common") {
395 lexicalFields.push_back(*cit);
396 }
397 }
398
399 nameList = xGetSubSourceSubtypeNames();
400 for(NAMELIST::const_iterator cit=nameList.begin();
401 cit != nameList.end(); ++cit) {
402 if(*cit != "other") {
403 lexicalFields.push_back(*cit);
404 }
405 }
406
407 sort(lexicalFields.begin(), lexicalFields.end());
408
409 for (FIELDS::const_iterator cit = lexicalFields.begin();
410 cit != lexicalFields.end(); ++cit)
411 {
412 string compressed_name = xCompressFieldName(*cit);
413 if (processedFields.find(compressed_name) == processedFields.end()) {
414 orderedFields.push_back(*cit);
415 processedFields.insert(compressed_name);
416 }
417 }
418
419 return orderedFields;
420 }
421
422
423 // ----------------------------------------------------------------------------
xGetOrgModSubtypeNames()424 CSrcWriter::NAMELIST CSrcWriter::xGetOrgModSubtypeNames()
425 // ----------------------------------------------------------------------------
426 {
427 NAMELIST subtypeNames;
428
429 typedef const CEnumeratedTypeValues::TValues TVALUES;
430 TVALUES nameValPairs = COrgMod::ENUM_METHOD_NAME(ESubtype)()->GetValues();
431
432 for (TVALUES::const_iterator cit = nameValPairs.begin();
433 cit != nameValPairs.end(); ++cit) {
434 subtypeNames.push_back(cit->first);
435 }
436 return subtypeNames;
437 }
438
439
440 // ----------------------------------------------------------------------------
xGetSubSourceSubtypeNames()441 CSrcWriter::NAMELIST CSrcWriter::xGetSubSourceSubtypeNames()
442 // ----------------------------------------------------------------------------
443 {
444 NAMELIST subtypeNames;
445
446 typedef const CEnumeratedTypeValues::TValues TVALUES;
447 TVALUES nameValPairs = CSubSource::ENUM_METHOD_NAME(ESubtype)()->GetValues();
448
449 for (TVALUES::const_iterator cit = nameValPairs.begin();
450 cit != nameValPairs.end(); ++cit) {
451 subtypeNames.push_back(cit->first);
452 }
453 return subtypeNames;
454 }
455
456
457 // ----------------------------------------------------------------------------
xCompressFieldName(const string & fieldName)458 string CSrcWriter::xCompressFieldName(
459 const string& fieldName)
460 // ----------------------------------------------------------------------------
461 {
462 string name = NStr::TruncateSpaces(fieldName);
463 NStr::ToLower(name);
464 NStr::ReplaceInPlace(name,"\"","");
465 NStr::ReplaceInPlace(name,"-","");
466 NStr::ReplaceInPlace(name, "_", "");
467 NStr::ReplaceInPlace(name, " ", "");
468
469 return name;
470 }
471
472
473 // ----------------------------------------------------------------------------
xIsSubsourceTypeSuppressed(CSubSource::TSubtype subtype)474 bool CSrcWriter::xIsSubsourceTypeSuppressed(
475 CSubSource::TSubtype subtype)
476 // ----------------------------------------------------------------------------
477 {
478 if (CSubSource::IsDiscouraged(subtype)) {
479 return true;
480 }
481 return false;
482 }
483
484
485 // ----------------------------------------------------------------------------
xIsOrgmodTypeSuppressed(COrgMod::TSubtype subtype)486 bool CSrcWriter::xIsOrgmodTypeSuppressed(
487 COrgMod::TSubtype subtype)
488 // ----------------------------------------------------------------------------
489 {
490 if (COrgMod::eSubtype_old_name == subtype) {
491 return false;
492 }
493 if (COrgMod::IsDiscouraged(subtype)) {
494 return true;
495 }
496 return false;
497 }
498
499
500 // ----------------------------------------------------------------------------
xGetHandler(const string & fieldName)501 CSrcWriter::HANDLER CSrcWriter::xGetHandler(
502 const string& fieldName)
503 // ----------------------------------------------------------------------------
504 {
505 return sHandlerMap[fieldName];
506 }
507
508
509 // ----------------------------------------------------------------------------
xHandleSourceField(const CBioSource & src,const string & fieldName,ILineErrorListener * pEC)510 bool CSrcWriter::xHandleSourceField(
511 const CBioSource& src,
512 const string& fieldName,
513 ILineErrorListener* pEC)
514 // ----------------------------------------------------------------------------
515 {
516 HANDLER pHandler = xGetHandler(fieldName);
517
518 if (!pHandler) {
519 CSrcError* pE = CSrcError::Create(
520 ncbi::eDiag_Error,
521 "Unable to find handler for field \"" + fieldName + "\".");
522 pEC->PutError(*pE);
523 delete pE;
524 return false;
525 }
526
527 return (this->*pHandler)(src, fieldName, pEC);
528 }
529
530
531 // ----------------------------------------------------------------------------
xTryDefaultId(const string & id,ILineErrorListener *)532 bool CSrcWriter::xTryDefaultId(
533 const string& id,
534 ILineErrorListener*)
535 // ----------------------------------------------------------------------------
536 {
537 if (id.empty()) {
538 return false;
539 }
540
541 string displayName;
542 string colName;
543 if ( NStr::StringToNumeric<TIntId>(id, NStr::fConvErr_NoThrow)) {
544 colName = displayName = "gi";
545 } else {
546 colName = "id";
547 displayName = "accession";
548 }
549
550 const string defaultValue;
551 xPrepareTableColumn(colName, displayName, defaultValue);
552 xAppendColumnValue(colName, id);
553
554 mSrcTable->SetNum_rows(mSrcTable->GetNum_rows()+1);
555 return true;
556 }
557
558
559 // ----------------------------------------------------------------------------
xGather(CBioseq_Handle bsh,const string default_id,const FIELDS & desiredFields,ILineErrorListener *)560 bool CSrcWriter::xGather(
561 CBioseq_Handle bsh,
562 const string default_id,
563 const FIELDS& desiredFields,
564 ILineErrorListener*)
565 // ----------------------------------------------------------------------------
566 {
567 // for each of biosources we may create individual record
568 // with the same ID
569 bool wantGi = ( find(desiredFields.begin(), desiredFields.end(), "gi") != desiredFields.end() );
570 bool wantLocalId = ( find(desiredFields.begin(), desiredFields.end(), "localid") != desiredFields.end() );
571 bool wantBankitId = ( find(desiredFields.begin(), desiredFields.end(), "bankitid") != desiredFields.end() );
572 bool wantDef = ( find(desiredFields.begin(), desiredFields.end(), "definition") != desiredFields.end() );
573 bool wantAuthor = ( find(desiredFields.begin(), desiredFields.end(), "author") != desiredFields.end() );
574 bool wantAffil = ( find(desiredFields.begin(), desiredFields.end(), "affil") != desiredFields.end() );
575
576
577 if (!bsh) {
578 return xTryDefaultId(default_id);
579 }
580
581 if (!xGatherId(bsh) ||
582 (wantGi && !xGatherGi(bsh)) ||
583 (wantLocalId && !xGatherLocalId(bsh)) ||
584 (wantBankitId && !xGatherBankitId(bsh)) ||
585 (wantDef && !xGatherDefline(bsh))) {
586 return false; // Not sure if this is the correct logic.
587 // If any of the accession, GI, Local ID or definition are invalid/not found,
588 // the row is not counted.
589 }
590
591 if (wantAuthor || wantAffil) {
592 string auths;
593 string comma;
594 string affls;
595 const CCit_sub* latest_sub = 0;
596 const CDate* latest_date = 0;
597 for (CSeqdesc_CI pdit(bsh, CSeqdesc::e_Pub); pdit; ++pdit) {
598 const CPubdesc& pubdesc = pdit->GetPub();
599 if (pubdesc.IsSetPub()) {
600 const CPub_equiv& pep = pubdesc.GetPub();
601 if (pep.IsSet()) {
602 ITERATE ( CPub_equiv::Tdata, it, pep.Get()) {
603 if ((*it)->Which() == CPub::e_Sub) {
604 const CCit_sub& sub = (*it)->GetSub();
605 if (sub.IsSetDate()) {
606 const CDate& curr_date = sub.GetDate();
607 if (latest_date != 0) {
608 if (latest_date->Compare(curr_date) == CDate::eCompare_before) {
609 latest_sub = ⊂
610 latest_date = &curr_date;
611 }
612 } else {
613 latest_sub = ⊂
614 latest_date = &curr_date;
615 }
616 }
617 }
618 }
619 }
620 }
621 }
622 if (latest_sub != 0) {
623 if (latest_sub->IsSetAuthors()) {
624 const CAuth_list& authors = latest_sub->GetAuthors();
625 if (authors.IsSetNames()) {
626 const CAuth_list::TNames& names = authors.GetNames();
627 if (names.IsStd()) {
628 ITERATE (CAuth_list::TNames::TStd, it, names.GetStd()) {
629 const CAuthor& auth = **it;
630 if (auth.IsSetName()) {
631 const CPerson_id& pid = auth.GetName();
632 if (pid.IsName()) {
633 const CName_std& name = pid.GetName();
634 if (name.IsSetLast()) {
635 string nm;
636 if (name.IsSetInitials()) {
637 nm = name.GetInitials() + " ";
638 }
639 nm += name.GetLast();
640 auths += comma + nm;
641 }
642 comma = ", ";
643 } else if (pid.IsConsortium()) {
644 string cnsrt = pid.GetConsortium();
645 auths += comma + cnsrt;
646 comma = ", ";
647 }
648 }
649 }
650 }
651 }
652 if (authors.IsSetAffil()) {
653 const CAffil& affil = authors.GetAffil();
654 if (affil.IsStr()) {
655 affls = affil.GetStr();
656 } else if (affil.IsStd()) {
657 const CAffil::C_Std& std = affil.GetStd();
658 if (std.IsSetAffil()) {
659 affls = std.GetAffil();
660 }
661 }
662 }
663 }
664 }
665 if (wantAuthor) {
666 if (auths.length() > 0) {
667 static const string colName = "author";
668 static const string displayName = colName;
669 static const string defaultValue;
670
671 xPrepareTableColumn(colName, displayName, defaultValue);
672 xAppendColumnValue(colName, auths);
673 }
674 }
675 if (wantAffil) {
676 if (affls.length() > 0) {
677 static const string colName = "affil";
678 static const string displayName = colName;
679 static const string defaultValue;
680
681 xPrepareTableColumn(colName, displayName, defaultValue);
682 xAppendColumnValue(colName, affls);
683 }
684 }
685 }
686
687 int num_sources = 0;
688 for (CSeqdesc_CI sdit(bsh, CSeqdesc::e_Source); sdit; ++sdit) {
689 const CBioSource& src = sdit->GetSource();
690 for (FIELDS::const_iterator cit = desiredFields.begin();
691 cit != desiredFields.end(); ++cit) {
692 if (*cit == "id" || *cit == "gi" || *cit == "definition" || *cit == "localid" ||
693 *cit == "bankitid" || *cit == "author" || *cit == "affil") {
694 continue;
695 }
696 if (!xHandleSourceField(src, *cit)) {
697 return false;
698 }
699 }
700 ++num_sources;
701 mSrcTable->SetNum_rows(mSrcTable->GetNum_rows()+1); // Each source has its own row
702 }
703
704
705 if (num_sources == 0) {
706 mSrcTable->SetNum_rows(mSrcTable->GetNum_rows()+1);
707 }
708
709
710 return true;
711 }
712
713
714 // ----------------------------------------------------------------------------
xGatherId(CBioseq_Handle bsh,ILineErrorListener *)715 bool CSrcWriter::xGatherId(
716 CBioseq_Handle bsh,
717 ILineErrorListener*)
718 // ----------------------------------------------------------------------------
719 {
720 if (!bsh) {
721 return false;
722 }
723
724 string label;
725 CConstRef<CSeq_id> sid = bsh.GetSeqId();
726 label = sequence::GetAccessionForId(*sid, bsh.GetScope());
727
728 if (!label.empty()) {
729 const string colName = "id";
730 const string defaultValue;
731 xPrepareTableColumn(colName, "accession", defaultValue);
732 xAppendColumnValue(colName, label);
733 }
734 return true;
735 }
736
737
738 // ----------------------------------------------------------------------------
xGatherGi(CBioseq_Handle bsh,ILineErrorListener *)739 bool CSrcWriter::xGatherGi(
740 CBioseq_Handle bsh,
741 ILineErrorListener*)
742 // ----------------------------------------------------------------------------
743 {
744
745 if (!bsh) {
746 return false;
747 }
748
749 const string colName = "gi";
750 string label;
751
752 ITERATE( CBioseq_Handle::TId, it, bsh.GetId() ) {
753 if( it->IsGi() ){
754 it->GetSeqId()->GetLabel(&label, CSeq_id::eContent);
755 break;
756 }
757 }
758
759 if (!label.empty()) {
760 const string displayName = "gi";
761 const string defaultValue;
762 xPrepareTableColumn(colName, displayName, defaultValue);
763 xAppendColumnValue(colName, label);
764 }
765 return true;
766 }
767
768
769 // ----------------------------------------------------------------------------
xGetOriginalId(const CBioseq_Handle & bsh) const770 string CSrcWriter::xGetOriginalId(const CBioseq_Handle& bsh) const
771 // ----------------------------------------------------------------------------
772 {
773 const CBioseq_Handle::TDescr& descr= bsh.GetDescr();
774
775 FOR_EACH_SEQDESC_ON_SEQDESCR (it, descr) {
776 const CSeqdesc& desc = **it;
777 if ( !desc.IsUser() || !desc.GetUser().IsSetType() ) continue;
778 const CUser_object& usr = desc.GetUser();
779 const CObject_id& oi = usr.GetType();
780 if ( !oi.IsStr() ) continue;
781 const string& type = oi.GetStr();
782 if ( !NStr::EqualNocase(type, "OrginalID") && !NStr::EqualNocase(type, "OriginalID") ) continue;
783 FOR_EACH_USERFIELD_ON_USEROBJECT (uitr, usr) {
784 const CUser_field& fld = **uitr;
785 if ( FIELD_IS_SET_AND_IS(fld, Label, Str) ) {
786 const string &label_str = GET_FIELD(fld.GetLabel(), Str);
787 if ( !NStr::EqualNocase(label_str, "LocalId") ) continue;
788 if ( fld.IsSetData() && fld.GetData().IsStr() ) {
789 return fld.GetData().GetStr();
790 }
791 }
792 }
793 }
794
795 return "";
796 }
797
798
799 // ----------------------------------------------------------------------------
xGatherLocalId(CBioseq_Handle bsh,ILineErrorListener *)800 bool CSrcWriter::xGatherLocalId(
801 CBioseq_Handle bsh,
802 ILineErrorListener*)
803 // ----------------------------------------------------------------------------
804 {
805 if (!bsh) {
806 return true;
807 }
808
809 static const string colName = "localid";
810 static const string displayName = colName;
811 static const string defaultValue;
812
813 string local_id = xGetOriginalId(bsh);
814 if ( NStr::IsBlank(local_id) ) {
815 CConstRef<CSeq_id> seq_id = bsh.GetLocalIdOrNull();
816 if ( !seq_id ) {
817 return true;
818 }
819 seq_id->GetLabel(&local_id, CSeq_id::eContent);
820 if ( NStr::IsBlank(local_id) ) {
821 return true;
822 }
823 }
824
825 xPrepareTableColumn(colName, displayName, defaultValue);
826 xAppendColumnValue(colName, local_id);
827 return true;
828 }
829
830
831 // ----------------------------------------------------------------------------
xGatherBankitId(CBioseq_Handle bsh,ILineErrorListener *)832 bool CSrcWriter::xGatherBankitId(
833 CBioseq_Handle bsh,
834 ILineErrorListener*)
835 // ----------------------------------------------------------------------------
836 {
837 if (!bsh) {
838 return true;
839 }
840
841 static const string colName = "bankitid";
842 static const string displayName = colName;
843 static const string defaultValue;
844
845 stringstream bankitIdOstr;
846 ITERATE( CBioseq_Handle::TId, it, bsh.GetId() ) {
847 const auto& pId = it->GetSeqId();
848 if (!pId || !pId->IsGeneral()) {
849 continue;
850 }
851 const auto& general = pId->GetGeneral();
852 if (!general.IsSetDb() || general.GetDb() != "BankIt") {
853 continue;
854 }
855 if (!general.IsSetTag()) {
856 continue; // not enough to work with
857 }
858 bankitIdOstr << "BankIt";
859 general.GetTag().AsString(bankitIdOstr);
860 break;
861 }
862 string bankitId = bankitIdOstr.str();
863 if (!bankitId.empty()) {
864 xPrepareTableColumn(colName, displayName, defaultValue);
865 xAppendColumnValue(colName, bankitId);
866 }
867 return true;
868 }
869
870
xGatherDefline(CBioseq_Handle bsh,ILineErrorListener *)871 bool CSrcWriter::xGatherDefline(
872 CBioseq_Handle bsh,
873 ILineErrorListener*)
874 // ----------------------------------------------------------------------------
875 {
876 if (!bsh) {
877 return true;
878 }
879
880 static const string colName = "definition";
881 static const string displayName = colName;
882 static const string defaultValue;
883
884 string label = sequence::CDeflineGenerator().GenerateDefline(bsh);
885 if (label.empty()) {
886 return true;
887 }
888 xPrepareTableColumn(colName, displayName, defaultValue);
889 xAppendColumnValue(colName, label);
890 return true;
891 }
892
893
894 // ----------------------------------------------------------------------------
xGatherTaxname(const CBioSource & src,const string & colName,ILineErrorListener *)895 bool CSrcWriter::xGatherTaxname(
896 const CBioSource& src,
897 const string& colName,
898 ILineErrorListener*)
899 // ----------------------------------------------------------------------------
900 {
901 const string displayName = "organism";
902 const string defaultValue;
903
904 if (!src.IsSetTaxname()) {
905 return true;
906 }
907 string value = src.GetTaxname();
908 xPrepareTableColumn(colName, displayName, defaultValue);
909 xAppendColumnValue(colName, value);
910 return true;
911 }
912
913
914 // ----------------------------------------------------------------------------
xGatherOrgCommon(const CBioSource & src,const string & colName,ILineErrorListener *)915 bool CSrcWriter::xGatherOrgCommon(
916 const CBioSource& src,
917 const string& colName,
918 ILineErrorListener*)
919 // ----------------------------------------------------------------------------
920 {
921 const string displayName = "common";
922 const string defaultValue;
923
924 if (!src.IsSetOrg() || !src.GetOrg().IsSetCommon()) {
925 return true;
926 }
927 string value = src.GetOrg().GetCommon();
928 xPrepareTableColumn(colName, displayName, defaultValue);
929 xAppendColumnValue(colName, value);
930 return true;
931 }
932
933
934 // ----------------------------------------------------------------------------
xGatherOrgnameLineage(const CBioSource & src,const string & colName,ILineErrorListener *)935 bool CSrcWriter::xGatherOrgnameLineage(
936 const CBioSource& src,
937 const string& colName,
938 ILineErrorListener*)
939 // ----------------------------------------------------------------------------
940 {
941 const string displayName = "lineage";
942 const string defaultValue;
943
944 if (!src.IsSetOrg() || !src.GetOrg().IsSetOrgname()
945 || !src.GetOrg().GetOrgname().IsSetLineage()) {
946 return true;
947 }
948 string value = src.GetOrg().GetOrgname().GetLineage();
949 xPrepareTableColumn(colName, displayName, defaultValue);
950 xAppendColumnValue(colName, value);
951 return true;
952 }
953
954
955 // ----------------------------------------------------------------------------
xGatherDivision(const CBioSource & src,const string & colName,ILineErrorListener *)956 bool CSrcWriter::xGatherDivision(
957 const CBioSource& src,
958 const string& colName,
959 ILineErrorListener*)
960 // ----------------------------------------------------------------------------
961 {
962 const string displayName = "division";
963 const string defaultValue;
964
965 if (!src.IsSetOrg() || !src.GetOrg().IsSetDivision()) {
966 return true;
967 }
968 string value = src.GetOrg().GetDivision();
969 xPrepareTableColumn(colName, displayName, defaultValue);
970 xAppendColumnValue(colName, value);
971 return true;
972 }
973
974
975 // ----------------------------------------------------------------------------
xGatherGenome(const CBioSource & src,const string & colName,ILineErrorListener *)976 bool CSrcWriter::xGatherGenome(
977 const CBioSource& src,
978 const string& colName,
979 ILineErrorListener*)
980 // ----------------------------------------------------------------------------
981 {
982 const string displayName = "genome";
983 const string defaultValue;
984
985 if (!src.IsSetGenome()) {
986 return true;
987 }
988 string value = CBioSource::GetOrganelleByGenome(src.GetGenome());
989 xPrepareTableColumn(colName, displayName, defaultValue);
990 xAppendColumnValue(colName, value);
991 return true;
992 }
993
994
995 // ----------------------------------------------------------------------------
xGatherOrigin(const CBioSource & src,const string & colName,ILineErrorListener *)996 bool CSrcWriter::xGatherOrigin(
997 const CBioSource& src,
998 const string& colName,
999 ILineErrorListener*)
1000 // ----------------------------------------------------------------------------
1001 {
1002 const string displayName = "origin";
1003 const string defaultValue;
1004
1005 if (!src.IsSetOrigin()) {
1006 return true;
1007 }
1008 string value = CBioSource::GetStringFromOrigin(src.GetOrigin());
1009 xPrepareTableColumn(colName, displayName, defaultValue);
1010 xAppendColumnValue(colName, value);
1011 return true;
1012 }
1013
1014
1015 // ----------------------------------------------------------------------------
xGatherSubtypeFeat(const CBioSource & src,const string & colName,ILineErrorListener *)1016 bool CSrcWriter::xGatherSubtypeFeat(
1017 const CBioSource& src,
1018 const string& colName,
1019 ILineErrorListener*)
1020 // ----------------------------------------------------------------------------
1021 {
1022
1023
1024 if ( !src.IsSetSubtype() ) {
1025 return true;
1026 }
1027
1028 CSubSource::TSubtype subtype = CSubSource::GetSubtypeValue(colName,CSubSource::eVocabulary_raw);
1029
1030 if ( xIsSubsourceTypeSuppressed(subtype) ) {
1031 return true;
1032 }
1033
1034
1035 typedef list<CRef<CSubSource> > SUBSOURCES;
1036 const SUBSOURCES& subsources = src.GetSubtype();
1037
1038 string key = colName;
1039 int count = 0;
1040 for (SUBSOURCES::const_iterator cit = subsources.begin();
1041 cit != subsources.end(); ++cit) {
1042
1043 const CSubSource& subsrc = **cit;
1044 if (subsrc.GetSubtype() != subtype) {
1045 continue;
1046 }
1047
1048 if (count) {
1049 key = colName + "#" + NStr::IntToString(count+1);
1050 }
1051 ++count;
1052
1053 string value;
1054 if (subsrc.IsSetName()) {
1055 value = subsrc.GetName();
1056 }
1057 if (value.empty() && CSubSource::NeedsNoText(subsrc.GetSubtype())) {
1058 value = "true";
1059 }
1060 xPrepareTableColumn(key, key, "");
1061 xAppendColumnValue(key, value);
1062 }
1063 return true;
1064 }
1065
1066
1067 // ----------------------------------------------------------------------------
xGatherOrgModFeat(const CBioSource & src,const string & colName,ILineErrorListener *)1068 bool CSrcWriter::xGatherOrgModFeat(
1069 const CBioSource& src,
1070 const string& colName,
1071 ILineErrorListener*)
1072 // ----------------------------------------------------------------------------
1073 {
1074 if ( !src.IsSetOrgMod() ) {
1075 return true;
1076 }
1077
1078 COrgMod::TSubtype subtype = COrgMod::GetSubtypeValue(colName,COrgMod::eVocabulary_raw);
1079
1080 if ( xIsOrgmodTypeSuppressed(subtype) ) {
1081 return true;
1082 }
1083
1084 typedef list<CRef<COrgMod> > ORGMODS;
1085 const ORGMODS& orgmods = src.GetOrgname().GetMod();
1086
1087 string key = colName;
1088 int count = 0;
1089 for (ORGMODS::const_iterator cit = orgmods.begin();
1090 cit != orgmods.end(); ++cit) {
1091 const COrgMod& orgmod = **cit;
1092
1093 if (orgmod.GetSubtype() != subtype) {
1094 continue;
1095 }
1096
1097 if (count) {
1098 key = colName + "#" + NStr::IntToString(count+1);
1099 }
1100 ++count;
1101
1102 string value = orgmod.GetSubname();
1103 xPrepareTableColumn(key, key, "");
1104 xAppendColumnValue(key, value);
1105 }
1106 return true;
1107 }
1108
1109
1110 // ----------------------------------------------------------------------------
xGatherDb(const CBioSource & src,const string & colName,ILineErrorListener *)1111 bool CSrcWriter::xGatherDb(
1112 const CBioSource& src,
1113 const string& colName,
1114 ILineErrorListener*)
1115 // ----------------------------------------------------------------------------
1116 {
1117 static const string displayName = "db";
1118 static const string defaultValue;
1119
1120 if (!src.IsSetOrg() || !src.GetOrg().IsSetDb()) {
1121 return true;
1122 }
1123
1124 typedef vector< CRef< CDbtag > > DBTAGS;
1125 const DBTAGS& tags = src.GetOrg().GetDb();
1126 for (DBTAGS::const_iterator cit = tags.begin(); cit != tags.end(); ++cit) {
1127 const CDbtag& tag = **cit;
1128 if (!tag.IsSetDb() || tag.GetDb().empty() || !tag.IsSetTag()) {
1129 continue;
1130 }
1131 const CObject_id& objid = tag.GetTag();
1132 string dbtagStr;
1133 switch (objid.Which()) {
1134 default:
1135 break;
1136 case CObject_id::e_Str:
1137 if (objid.GetStr().empty()) {
1138 continue;
1139 }
1140 dbtagStr = objid.GetStr();
1141 break;
1142 case CObject_id::e_Id:
1143 dbtagStr = NStr::IntToString(objid.GetId());
1144 break;
1145 }
1146 string curColName = colName;
1147 string curDisplayName = displayName;
1148 curColName += tag.GetDb();
1149 curDisplayName += tag.GetDb();
1150 xPrepareTableColumn(curColName, curDisplayName, "");
1151 xAppendColumnValue(curColName, dbtagStr);
1152 }
1153 return true;
1154 }
1155
1156
1157 // ----------------------------------------------------------------------------
xGatherTaxonId(const CBioSource & src,const string & colName,ILineErrorListener *)1158 bool CSrcWriter::xGatherTaxonId(
1159 const CBioSource& src,
1160 const string& colName,
1161 ILineErrorListener*)
1162 // ----------------------------------------------------------------------------
1163 {
1164 static const string displayName = "taxid";
1165 static const string defaultValue;
1166
1167 if (!src.IsSetOrg() || !src.GetOrg().IsSetDb()) {
1168 return true;
1169 }
1170
1171 typedef vector< CRef< CDbtag > > DBTAGS;
1172 const DBTAGS& tags = src.GetOrg().GetDb();
1173 string taxonIdStr;
1174 for (DBTAGS::const_iterator cit = tags.begin(); cit != tags.end(); ++cit) {
1175 const CDbtag& tag = **cit;
1176 if (!tag.IsSetDb() || tag.GetDb() != "taxon") {
1177 continue;
1178 }
1179 const CObject_id& objid = tag.GetTag();
1180 switch (objid.Which()) {
1181 default:
1182 return false;
1183 case CObject_id::e_Str:
1184 if (objid.GetStr().empty()) {
1185 continue;
1186 }
1187 taxonIdStr = objid.GetStr();
1188 break;
1189 case CObject_id::e_Id:
1190 taxonIdStr = NStr::IntToString(objid.GetId());
1191 break;
1192 }
1193 break;
1194 }
1195 string curDisplayName = displayName;
1196 xPrepareTableColumn(colName, displayName, "");
1197 xAppendColumnValue(colName, taxonIdStr);
1198 return true;
1199 }
1200
1201
1202 // ----------------------------------------------------------------------------
xGatherPcrPrimers(const CBioSource & src,const string & colName,ILineErrorListener *)1203 bool CSrcWriter::xGatherPcrPrimers(
1204 const CBioSource& src,
1205 const string& colName,
1206 ILineErrorListener*)
1207 // ----------------------------------------------------------------------------
1208 {
1209 const string pcrPrimersFwdNames = "pcr-primers.names.fwd";
1210 const string pcrPrimersFwdSequences = "pcr-primers.sequences.fwd";
1211 const string pcrPrimersRevNames = "pcr-primers.names.reverse";
1212 const string pcrPrimersRevSequences = "pcr-primers.sequences.reverse";
1213
1214 unsigned int columnSetCounter = 0;
1215
1216 if (!src.IsSetPcr_primers()) {
1217 return true;
1218 }
1219 string fwdName, fwdSequence, revName, revSequence;
1220 const CPCRReactionSet& pcrset = src.GetPcr_primers();
1221
1222 typedef list<CRef<CPCRReaction> > REACTIONS;
1223 const REACTIONS& reactions = pcrset.Get();
1224 for (REACTIONS::const_iterator cit = reactions.begin();
1225 cit != reactions.end(); ++cit) {
1226 const CPCRReaction& reaction = **cit;
1227 if (reaction.IsSetForward()) {
1228 if (fwdName.empty()) {
1229 fwdName += ";";
1230 fwdSequence += ";";
1231 }
1232 fwdName += CSrcWriter::xPrimerSetNames(reaction.GetForward());
1233 fwdSequence += CSrcWriter::xPrimerSetSequences(reaction.GetForward());
1234 }
1235 if (reaction.IsSetReverse()) {
1236 if (revName.empty()) {
1237 revName += ";";
1238 revSequence += ";";
1239 }
1240 revName += CSrcWriter::xPrimerSetNames(reaction.GetReverse());
1241 revSequence += CSrcWriter::xPrimerSetSequences(reaction.GetReverse());
1242 }
1243 }
1244 string keyPcrPrimersFwdNames = pcrPrimersFwdNames;
1245 string keyPcrPrimersFwdSequences = pcrPrimersFwdSequences;
1246 string keyPcrPrimersRevNames = pcrPrimersRevNames;
1247 string keyPcrPrimersRevSequences = pcrPrimersRevSequences;
1248 if (columnSetCounter > 0) {
1249 keyPcrPrimersFwdNames += "#" + NStr::IntToString(columnSetCounter);
1250 keyPcrPrimersFwdSequences += "#" + NStr::IntToString(columnSetCounter);
1251 keyPcrPrimersRevNames += "#" + NStr::IntToString(columnSetCounter);
1252 keyPcrPrimersRevSequences += "#" + NStr::IntToString(columnSetCounter);
1253 }
1254 xPrepareTableColumn(
1255 keyPcrPrimersFwdNames, keyPcrPrimersFwdNames, "");
1256 xAppendColumnValue(keyPcrPrimersFwdNames, fwdName);
1257 xPrepareTableColumn(
1258 keyPcrPrimersFwdSequences, keyPcrPrimersFwdSequences, "");
1259 xAppendColumnValue(keyPcrPrimersFwdSequences, fwdSequence);
1260
1261 xPrepareTableColumn(
1262 keyPcrPrimersRevNames, keyPcrPrimersRevNames, "");
1263 xAppendColumnValue(keyPcrPrimersRevNames, revName);
1264 xPrepareTableColumn(
1265 keyPcrPrimersRevSequences, keyPcrPrimersRevSequences, "");
1266 xAppendColumnValue(keyPcrPrimersRevSequences, revSequence);
1267 return true;
1268 }
1269
1270
1271 // ----------------------------------------------------------------------------
xPrimerSetNames(const CPCRPrimerSet & pset)1272 string CSrcWriter::xPrimerSetNames(const CPCRPrimerSet& pset)
1273 // ----------------------------------------------------------------------------
1274 {
1275 string names;
1276 typedef list<CRef<CPCRPrimer> > PRIMERS;
1277 const PRIMERS& primers = pset.Get();
1278 for (PRIMERS::const_iterator cit = primers.begin();
1279 cit != primers.end(); ++cit) {
1280 const CPCRPrimer& primer = **cit;
1281 names += ",";
1282 if (primer.IsSetName()) {
1283 names += primer.GetName();
1284 }
1285 }
1286 return names.substr(1);
1287 }
1288
1289
1290 // ----------------------------------------------------------------------------
xPrimerSetSequences(const CPCRPrimerSet & pset)1291 string CSrcWriter::xPrimerSetSequences(const CPCRPrimerSet& pset)
1292 // ----------------------------------------------------------------------------
1293 {
1294 string sequences;
1295 typedef list<CRef<CPCRPrimer> > PRIMERS;
1296 const PRIMERS& primers = pset.Get();
1297 for (PRIMERS::const_iterator cit = primers.begin();
1298 cit != primers.end(); ++cit) {
1299 const CPCRPrimer& primer = **cit;
1300 sequences += ",";
1301 if (primer.IsSetSeq()) {
1302 sequences += primer.GetSeq();
1303 }
1304 }
1305 return sequences.substr(1);
1306 }
1307
1308
1309 // ----------------------------------------------------------------------------
xFormatTabDelimited(const FIELDS & colStubs,CNcbiOstream & out)1310 bool CSrcWriter::xFormatTabDelimited(
1311 const FIELDS& colStubs,
1312 CNcbiOstream& out)
1313 // ----------------------------------------------------------------------------
1314 {
1315 // Print columns in the order given in colStubs
1316 map<string,NAMELIST > ColstubToColnames;
1317 typedef map<string,NAMELIST > COLSTUBNAMESMAP;
1318
1319
1320 for (COLUMNMAP::const_iterator cit=mColnameToIndex.begin();
1321 cit != mColnameToIndex.end(); ++cit) {
1322 string colName = cit->first;
1323 string colStub = xGetColStub(colName);
1324 if (ColstubToColnames.find(colStub) == ColstubToColnames.end()) {
1325 ColstubToColnames[colStub] = NAMELIST(1,colName);
1326 } else {
1327 ColstubToColnames[colStub].push_back(colName);
1328 }
1329 }
1330
1331
1332 NAMELIST colNames;
1333 for (FIELDS::const_iterator cit = colStubs.begin();
1334 cit != colStubs.end(); ++cit) {
1335 COLSTUBNAMESMAP::iterator mapIter = ColstubToColnames.find(*cit);
1336 if (mapIter != ColstubToColnames.end()) {
1337 colNames.insert(colNames.end(), mapIter->second.begin(),
1338 mapIter->second.end());
1339 }
1340 }
1341
1342 // Write the output table
1343 for (NAMELIST::const_iterator cit = colNames.begin();
1344 cit != colNames.end(); ++cit) {
1345 const CSeqTable_column& column = mSrcTable->GetColumn(*cit);
1346 string displayName = column.GetHeader().GetTitle();
1347 out << displayName << CSrcWriter::mDelimiter;
1348 }
1349 out << '\n';
1350
1351
1352 unsigned int numRows = mSrcTable->GetNum_rows();
1353 for (unsigned int u=0; u < numRows; ++u) {
1354 for (NAMELIST::const_iterator cit = colNames.begin();
1355 cit != colNames.end(); ++cit) {
1356 const CSeqTable_column& column = mSrcTable->GetColumn(*cit);
1357 const string* pValue = column.GetStringPtr(u);
1358 bool needsQuotes = xValueNeedsQuoting(*pValue);
1359 if (needsQuotes) {
1360 out << "\"";
1361 }
1362 out << xDequotedValue(*pValue) << CSrcWriter::mDelimiter;
1363 if (needsQuotes) {
1364 out << "\"";
1365 }
1366 }
1367 out << '\n';
1368 }
1369 return true;
1370 }
1371
1372
1373 // ----------------------------------------------------------------------------
xGetColStub(const string & colName)1374 string CSrcWriter::xGetColStub(
1375 const string& colName)
1376 // ----------------------------------------------------------------------------
1377 {
1378 // pcr-primers special case
1379 if (NStr::Find(colName,"pcr-primers") != NPOS) {
1380 return "pcr-primers";
1381 }
1382
1383 // case where column name takes the form colStub#Number
1384 size_t position;
1385 if ((position = NStr::Find(colName,"#")) != NPOS) {
1386 return colName.substr(0,position);
1387 }
1388 return colName;
1389 }
1390
1391
1392 // ----------------------------------------------------------------------------
xAppendColumnValue(const string & colName,const string & colValue)1393 void CSrcWriter::xAppendColumnValue(
1394 const string& colName,
1395 const string& colValue)
1396 // ----------------------------------------------------------------------------
1397 {
1398 size_t index = mColnameToIndex[colName];
1399 CSeqTable_column& column = *mSrcTable->SetColumns().at(index);
1400 column.SetData().SetString().push_back(colValue);
1401 }
1402
1403
1404 // ----------------------------------------------------------------------------
ValidateFields(const FIELDS & fields,ILineErrorListener * pEC)1405 bool CSrcWriter::ValidateFields(
1406 const FIELDS& fields,
1407 ILineErrorListener* pEC)
1408 // ----------------------------------------------------------------------------
1409 {
1410 for (FIELDS::const_iterator cit = fields.begin(); cit != fields.end(); ++cit) {
1411 string field = *cit;
1412 NAMEMAP::const_iterator mapIter = sFieldnameToColname.find(xCompressFieldName(field));
1413 if (mapIter == sFieldnameToColname.end()) {
1414 CSrcError* pE = CSrcError::Create(
1415 ncbi::eDiag_Error,
1416 "Field name \"" + field + "\" not recognized.");
1417 pEC->PutError(*pE);
1418 delete pE;
1419 return false;
1420 }
1421 }
1422 return true;
1423 }
1424
1425
1426 // ----------------------------------------------------------------------------
xValueNeedsQuoting(const string & value)1427 bool CSrcWriter::xValueNeedsQuoting(
1428 const string& value)
1429 // ----------------------------------------------------------------------------
1430 {
1431 return (value.find(mDelimiter) != string::npos);
1432 }
1433
1434
1435 // ----------------------------------------------------------------------------
xDequotedValue(const string & value)1436 string CSrcWriter::xDequotedValue(
1437 const string& value)
1438 // For lack of better idea, replace all occurences of "\"" with "\'\'"
1439 // -----------------------------------------------------------------------------
1440 {
1441 return NStr::Replace(value, "\"", "\'\'");
1442 }
1443
1444
1445 // -----------------------------------------------------------------------------
CSrcError(EDiagSev severity,const string & message)1446 CSrcError::CSrcError(
1447 EDiagSev severity,
1448 const string& message):
1449 // -----------------------------------------------------------------------------
1450 CLineError(ILineError::eProblem_Unset, severity, "", 0,
1451 "", "", "", message, CLineError::TVecOfLines())
1452 {
1453 }
1454
1455
1456 // -----------------------------------------------------------------------------
Create(EDiagSev severity,const string & message)1457 CSrcError* CSrcError::Create(
1458 EDiagSev severity,
1459 const string& message)
1460 // -----------------------------------------------------------------------------
1461 {
1462 return new CSrcError(severity, message);
1463 }
1464
1465 END_NCBI_SCOPE
1466