1 /*  $Id: src_writer.cpp 632624 2021-06-03 17:38:23Z ivanov $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors:  Frank Ludwig, Justin Foley
27  *
28  * File Description:  Write source qualifiers
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
33 
34 #include <objects/general/general_macros.hpp>
35 #include <objects/general/User_object.hpp>
36 #include <objects/general/Dbtag.hpp>
37 #include <objects/general/Object_id.hpp>
38 #include <objects/seqfeat/PCRPrimerSet.hpp>
39 #include <objects/seqfeat/SeqFeatXref.hpp>
40 #include <objects/seqfeat/PCRReactionSet.hpp>
41 #include <objects/seqfeat/PCRReaction.hpp>
42 #include <objects/seqfeat/PCRPrimer.hpp>
43 
44 #include <objmgr/seqdesc_ci.hpp>
45 #include <objmgr/util/create_defline.hpp>
46 
47 #include <objects/seqtable/SeqTable_column.hpp>
48 #include <objects/seqtable/SeqTable_single_data.hpp>
49 #include <objects/seqtable/SeqTable_multi_data.hpp>
50 #include <objects/seq/seq_macros.hpp>
51 
52 #include <objects/pub/Pub.hpp>
53 #include <objects/pub/Pub_equiv.hpp>
54 #include <objects/pub/Pub_set.hpp>
55 
56 #include <objects/biblio/biblio__.hpp>
57 #include <objects/biblio/Cit_sub.hpp>
58 #include <objects/biblio/Auth_list.hpp>
59 #include <objects/biblio/Affil.hpp>
60 #include <objects/general/Name_std.hpp>
61 
62 #include <objects/general/Date_.hpp>
63 
64 #include <objtools/writers/writer_exception.hpp>
65 #include <objtools/readers/message_listener.hpp>
66 #include <objtools/writers/src_writer.hpp>
67 #include <objmgr/util/sequence.hpp>
68 
69 #include <sstream>
70 
71 BEGIN_NCBI_SCOPE
72 USING_SCOPE(objects);
73 
74 CSrcWriter::HANDLERMAP CSrcWriter::sHandlerMap;
75 CSrcWriter::NAMEMAP CSrcWriter::sFieldnameToColname;
76 //  ----------------------------------------------------------------------------
77 //  Default Fields:
78 //  ----------------------------------------------------------------------------
79 static const string arrDefaultSrcCheckFields[] = {
80     "id",
81     "gi",
82     "organism",
83     "taxid",
84     "specimen-voucher",
85     "culture-collection",
86     "bio-material",
87     "strain",
88     "sub-strain",
89     "isolate",
90     "sub-species",
91     "variety",
92     "forma",
93     "cultivar",
94     "ecotype",
95     "serotype",
96     "serovar",
97     "type-material",
98     "old-name",
99     "author",
100     "affil"
101 };
102 
103 static const string arrDefaultSeqEntryFields[] = {
104     "id",
105     "gi",
106     "organism",
107     "taxid",
108     "localid",
109     "specimen-voucher",
110     "culture-collection",
111     "bio-material",
112     "strain",
113     "sub-strain",
114     "isolate",
115     "sub-species",
116     "variety",
117     "forma",
118     "cultivar",
119     "ecotype",
120     "serotype",
121     "serovar",
122     "type-material",
123     "old-name",
124     "author",
125     "affil"
126 };
127 
128 
129 const size_t countDefaultSrcCheckFields = sizeof(arrDefaultSrcCheckFields)/sizeof(string);
130 
131 const CSrcWriter::FIELDS CSrcWriter::sDefaultSrcCheckFields(
132         arrDefaultSrcCheckFields, arrDefaultSrcCheckFields + countDefaultSrcCheckFields);
133 
134 const CSrcWriter::FIELDS CSrcWriter::sAllSrcCheckFields(
135         xGetOrderedFieldNames(CSrcWriter::sDefaultSrcCheckFields));
136 
137 const size_t countDefaultSeqEntryFields = sizeof(arrDefaultSeqEntryFields)/sizeof(string);
138 
139 const CSrcWriter::FIELDS CSrcWriter::sDefaultSeqEntryFields(
140         arrDefaultSeqEntryFields, arrDefaultSeqEntryFields + countDefaultSeqEntryFields);
141 
142 const CSrcWriter::FIELDS CSrcWriter::sAllSeqEntryFields(
143         xGetOrderedFieldNames(CSrcWriter::sDefaultSeqEntryFields));
144 
145 //  ----------------------------------------------------------------------------
WriteBioseqHandle(CBioseq_Handle bsh,const FIELDS & desiredFields,CNcbiOstream & out)146 bool CSrcWriter::WriteBioseqHandle(
147         CBioseq_Handle bsh,
148         const FIELDS& desiredFields,
149         CNcbiOstream& out)
150 //  ----------------------------------------------------------------------------
151 {
152     FIELDS colNames = xProcessFieldNames(desiredFields);
153 
154     if (!xGather(bsh, "", colNames)) {
155         return false;
156     }
157     if (!xFormatTabDelimited(colNames, out)) {
158         return false;
159     }
160     return true;
161 };
162 
163 
164 //  ----------------------------------------------------------------------------
WriteBioseqHandles(const vector<pair<string,CBioseq_Handle>> & vecIdBsh,const FIELDS & desiredFields,CNcbiOstream & out,ILineErrorListener * pEC)165 bool CSrcWriter::WriteBioseqHandles(
166         const vector<pair<string,CBioseq_Handle> >& vecIdBsh,
167         const FIELDS& desiredFields,
168         CNcbiOstream& out,
169         ILineErrorListener* pEC)
170 //  ----------------------------------------------------------------------------
171 {
172     typedef vector<pair<string,CBioseq_Handle> > HANDLES;
173     FIELDS colNames = xProcessFieldNames(desiredFields);
174 
175     for (HANDLES::const_iterator it = vecIdBsh.begin(); it != vecIdBsh.end(); ++it) {
176         if (!xGather(it->second, it->first, colNames)) {
177             return false;
178         }
179     }
180 
181     if (!xFormatTabDelimited(colNames,out)) {
182         return false;
183     }
184     return true;
185 };
186 
187 
188 //  ----------------------------------------------------------------------------
WriteSeqEntry(const CSeq_entry & seqEntry,CScope & scope,CNcbiOstream & out,const bool nucsOnly)189 bool CSrcWriter::WriteSeqEntry(
190         const CSeq_entry& seqEntry,
191         CScope& scope,
192         CNcbiOstream& out,
193         const bool nucsOnly)
194 //  ----------------------------------------------------------------------------
195 {
196     CSeq_entry_Handle handle = scope.AddTopLevelSeqEntry(seqEntry);
197     vector<pair<string,CBioseq_Handle> > vecIdBsh;
198     for (CBioseq_CI bci(handle); bci; ++bci) {
199         if(!nucsOnly || bci->IsNa()) {
200             vecIdBsh.push_back(make_pair("",*bci));
201         }
202     }
203     WriteBioseqHandles(vecIdBsh, sAllSeqEntryFields, out, 0);
204 
205     return true;
206 }
207 
208 
209 //  ----------------------------------------------------------------------------
xProcessFieldNames(const FIELDS & desiredFields)210 CSrcWriter::FIELDS CSrcWriter::xProcessFieldNames(
211         const FIELDS& desiredFields)
212 //  ----------------------------------------------------------------------------
213 {
214     FIELDS colNames;
215     if (desiredFields[0] != "id") {
216         colNames.push_back("id");
217     }
218     for (FIELDS::const_iterator cit = desiredFields.begin();
219             cit != desiredFields.end();  ++cit)  {
220         NAMEMAP::const_iterator mapIterator = sFieldnameToColname.find(xCompressFieldName(*cit));
221         if (mapIterator != sFieldnameToColname.end()) {
222             colNames.push_back(mapIterator->second);
223         } else {
224             colNames.push_back(*cit);
225         }
226     }
227     return colNames;
228 }
229 
230 
231 //  ---------------------------------------------------------------------------
xPrepareTableColumn(const string & colName,const string & colTitle,const string & colDefault)232 void CSrcWriter::xPrepareTableColumn(
233         const string& colName,
234         const string& colTitle,
235         const string& colDefault)
236 //  ---------------------------------------------------------------------------
237 {
238     COLUMNMAP::iterator it = mColnameToIndex.find(colName);
239     if (it == mColnameToIndex.end()) {
240         CRef<CSeqTable_column> pColumn(new CSeqTable_column());
241         pColumn->SetHeader().SetField_name(colName);
242         pColumn->SetHeader().SetTitle(colTitle);
243         pColumn->SetDefault().SetString(colDefault);
244         mColnameToIndex[colName] = mSrcTable->GetColumns().size();
245         mSrcTable->SetColumns().push_back(pColumn);
246     }
247     size_t index = mColnameToIndex[colName];
248     CSeqTable_column& column = *mSrcTable->SetColumns().at(index);
249     column.SetData().SetString();
250     while (column.GetData().GetString().size() < mSrcTable->GetNum_rows()) {
251         column.SetData().SetString().push_back(colDefault);
252     }
253 }
254 
255 
256 //  ----------------------------------------------------------------------------
xInit()257 void CSrcWriter::xInit()
258     //  ----------------------------------------------------------------------------
259 {
260     if (sHandlerMap.empty()) {
261         sHandlerMap["db"] = &CSrcWriter::xGatherDb;
262         sHandlerMap["taxid"] = &CSrcWriter::xGatherTaxonId;
263         sHandlerMap["division"] = &CSrcWriter::xGatherDivision;
264         sHandlerMap["genome"] = &CSrcWriter::xGatherGenome;
265         sHandlerMap["lineage"] = &CSrcWriter::xGatherOrgnameLineage;
266         sHandlerMap["common"] = &CSrcWriter::xGatherOrgCommon;
267         sHandlerMap["origin"] = &CSrcWriter::xGatherOrigin;
268         sHandlerMap["pcr-primers"] = &CSrcWriter::xGatherPcrPrimers;
269         sHandlerMap["organism"] = &CSrcWriter::xGatherTaxname;
270 
271         sHandlerMap["note"] = &CSrcWriter::xGatherOrgModFeat;
272         sHandlerMap["subsource-note"] = &CSrcWriter::xGatherSubtypeFeat;
273 
274         NAMELIST nameList = xGetOrgModSubtypeNames();
275         for (NAMELIST::const_iterator cit=nameList.begin();
276                 cit != nameList.end(); ++cit) {
277             if (*cit != "other" && *cit != "common") {
278                 sHandlerMap[*cit] = &CSrcWriter::xGatherOrgModFeat;
279             }
280         }
281 
282         nameList = xGetSubSourceSubtypeNames();
283         for (NAMELIST::const_iterator cit=nameList.begin();
284                 cit != nameList.end(); ++cit) {
285             if (*cit != "other") {
286                 sHandlerMap[*cit] = &CSrcWriter::xGatherSubtypeFeat;
287             }
288         }
289     }
290 
291 
292     if (sFieldnameToColname.empty()) {
293         sFieldnameToColname["id"] = "id";
294         sFieldnameToColname["gi"] = "gi";
295         sFieldnameToColname["localid"] = "localid";
296         sFieldnameToColname["bankitid"] = "bankitid";
297         sFieldnameToColname["definition"] = "definition";
298         sFieldnameToColname["def"] = "definition";
299         sFieldnameToColname["defline"] = "definition";
300 
301         sFieldnameToColname["author"] = "author";
302         sFieldnameToColname["affil"] = "affil";
303 
304         sFieldnameToColname["db"] = "db";
305         sFieldnameToColname["org.db"] = "db";
306 
307         sFieldnameToColname["taxid"] = "taxid";
308         sFieldnameToColname["div"] = "division";
309         sFieldnameToColname["division"] = "division";
310         sFieldnameToColname["genome"] = "genome";
311         sFieldnameToColname["lineage"] = "lineage";
312         sFieldnameToColname["common"] = "common";
313         sFieldnameToColname["org.common"] = "common";
314 
315         sFieldnameToColname["origin"] = "origin";
316         sFieldnameToColname["pcrprimers"] = "pcr-primers";
317         sFieldnameToColname["organism"] = "organism";
318         sFieldnameToColname["taxname"] = "organism";
319         sFieldnameToColname["org.taxname"] = "organism";
320         sFieldnameToColname["org"] = "organism";
321 
322         // OrgMod
323         sFieldnameToColname["note"] = "note";
324         sFieldnameToColname["orgmod.note"] = "note";
325 
326         // Subsource
327         sFieldnameToColname["insertionseq"] = "insertion-seq";
328         sFieldnameToColname["plasmid"] = "plasmid";
329         sFieldnameToColname["transposon"] = "transposon";
330 
331         sFieldnameToColname["subsourcenote"] = "subsource-note";
332         sFieldnameToColname["subsrcnote"] = "subsource-note";
333 
334         NAMELIST nameList = xGetOrgModSubtypeNames();
335         for (NAMELIST::const_iterator cit=nameList.begin();
336                 cit != nameList.end();  ++cit) {
337 
338             if (*cit != "other") {
339                 sFieldnameToColname[xCompressFieldName(*cit)] = *cit;
340             }
341         }
342 
343         nameList = xGetSubSourceSubtypeNames();
344         for (NAMELIST::const_iterator cit=nameList.begin();
345                 cit != nameList.end();  ++cit) {
346             if(*cit != "other") {
347                 sFieldnameToColname[xCompressFieldName(*cit)] = *cit;
348             }
349         }
350 
351     }
352 
353 
354     mSrcTable.Reset(new CSeq_table());
355     mSrcTable->SetNum_rows(0);
356 }
357 
358 
359 //  ----------------------------------------------------------------------------
xGetOrderedFieldNames(const FIELDS & defaultFields)360 CSrcWriter::FIELDS CSrcWriter::xGetOrderedFieldNames(const FIELDS& defaultFields)
361     //  ----------------------------------------------------------------------------
362 {
363     FIELDS orderedFields;
364     set<string> processedFields;
365 
366     for (FIELDS::const_iterator cit=defaultFields.begin();
367             cit != defaultFields.end();
368             ++cit) {
369         string colName = *cit;
370         orderedFields.push_back(colName);
371         processedFields.insert(xCompressFieldName(colName));
372     }
373 
374     FIELDS lexicalFields;
375     lexicalFields.push_back("organism");
376     lexicalFields.push_back("genome");
377     lexicalFields.push_back("pcr-primers");
378     lexicalFields.push_back("db");
379     lexicalFields.push_back("common");
380     lexicalFields.push_back("lineage");
381     lexicalFields.push_back("origin");
382     lexicalFields.push_back("note");
383     lexicalFields.push_back("subsource-note");
384     lexicalFields.push_back("division");
385     lexicalFields.push_back("definition");
386     lexicalFields.push_back("bankitid");
387 
388     lexicalFields.push_back("author");
389     lexicalFields.push_back("affil");
390 
391     NAMELIST nameList = xGetOrgModSubtypeNames();
392     for(NAMELIST::const_iterator cit=nameList.begin();
393             cit != nameList.end();  ++cit) {
394         if (*cit != "other" && *cit != "common") {
395             lexicalFields.push_back(*cit);
396         }
397     }
398 
399     nameList = xGetSubSourceSubtypeNames();
400     for(NAMELIST::const_iterator cit=nameList.begin();
401             cit != nameList.end();  ++cit) {
402         if(*cit != "other") {
403             lexicalFields.push_back(*cit);
404         }
405     }
406 
407     sort(lexicalFields.begin(), lexicalFields.end());
408 
409     for (FIELDS::const_iterator cit = lexicalFields.begin();
410             cit != lexicalFields.end();  ++cit)
411     {
412         string compressed_name = xCompressFieldName(*cit);
413         if (processedFields.find(compressed_name) == processedFields.end()) {
414             orderedFields.push_back(*cit);
415             processedFields.insert(compressed_name);
416         }
417     }
418 
419     return orderedFields;
420 }
421 
422 
423 //  ----------------------------------------------------------------------------
xGetOrgModSubtypeNames()424 CSrcWriter::NAMELIST CSrcWriter::xGetOrgModSubtypeNames()
425     //  ----------------------------------------------------------------------------
426 {
427     NAMELIST subtypeNames;
428 
429     typedef const CEnumeratedTypeValues::TValues TVALUES;
430     TVALUES nameValPairs = COrgMod::ENUM_METHOD_NAME(ESubtype)()->GetValues();
431 
432     for (TVALUES::const_iterator cit = nameValPairs.begin();
433             cit != nameValPairs.end();  ++cit) {
434         subtypeNames.push_back(cit->first);
435     }
436     return subtypeNames;
437 }
438 
439 
440 //  ----------------------------------------------------------------------------
xGetSubSourceSubtypeNames()441 CSrcWriter::NAMELIST CSrcWriter::xGetSubSourceSubtypeNames()
442     //  ----------------------------------------------------------------------------
443 {
444     NAMELIST subtypeNames;
445 
446     typedef const CEnumeratedTypeValues::TValues TVALUES;
447     TVALUES nameValPairs = CSubSource::ENUM_METHOD_NAME(ESubtype)()->GetValues();
448 
449     for (TVALUES::const_iterator cit = nameValPairs.begin();
450             cit != nameValPairs.end();  ++cit) {
451         subtypeNames.push_back(cit->first);
452     }
453     return subtypeNames;
454 }
455 
456 
457 //  ----------------------------------------------------------------------------
xCompressFieldName(const string & fieldName)458 string CSrcWriter::xCompressFieldName(
459         const string& fieldName)
460 //  ----------------------------------------------------------------------------
461 {
462     string name = NStr::TruncateSpaces(fieldName);
463     NStr::ToLower(name);
464     NStr::ReplaceInPlace(name,"\"","");
465     NStr::ReplaceInPlace(name,"-","");
466     NStr::ReplaceInPlace(name, "_", "");
467     NStr::ReplaceInPlace(name, " ", "");
468 
469     return name;
470 }
471 
472 
473 //  ----------------------------------------------------------------------------
xIsSubsourceTypeSuppressed(CSubSource::TSubtype subtype)474 bool CSrcWriter::xIsSubsourceTypeSuppressed(
475         CSubSource::TSubtype subtype)
476 //  ----------------------------------------------------------------------------
477 {
478     if (CSubSource::IsDiscouraged(subtype)) {
479         return true;
480     }
481     return false;
482 }
483 
484 
485 //  ----------------------------------------------------------------------------
xIsOrgmodTypeSuppressed(COrgMod::TSubtype subtype)486 bool CSrcWriter::xIsOrgmodTypeSuppressed(
487         COrgMod::TSubtype subtype)
488 //  ----------------------------------------------------------------------------
489 {
490     if (COrgMod::eSubtype_old_name == subtype) {
491         return false;
492     }
493     if (COrgMod::IsDiscouraged(subtype)) {
494         return true;
495     }
496     return false;
497 }
498 
499 
500 //  ----------------------------------------------------------------------------
xGetHandler(const string & fieldName)501 CSrcWriter::HANDLER CSrcWriter::xGetHandler(
502         const string& fieldName)
503 //  ----------------------------------------------------------------------------
504 {
505     return sHandlerMap[fieldName];
506 }
507 
508 
509 //  ----------------------------------------------------------------------------
xHandleSourceField(const CBioSource & src,const string & fieldName,ILineErrorListener * pEC)510 bool CSrcWriter::xHandleSourceField(
511         const CBioSource& src,
512         const string& fieldName,
513         ILineErrorListener* pEC)
514 //  ----------------------------------------------------------------------------
515 {
516     HANDLER pHandler = xGetHandler(fieldName);
517 
518     if (!pHandler) {
519         CSrcError* pE = CSrcError::Create(
520                 ncbi::eDiag_Error,
521                 "Unable to find handler for field \"" + fieldName + "\".");
522         pEC->PutError(*pE);
523         delete pE;
524         return false;
525     }
526 
527     return (this->*pHandler)(src, fieldName, pEC);
528 }
529 
530 
531 //  ----------------------------------------------------------------------------
xTryDefaultId(const string & id,ILineErrorListener *)532 bool CSrcWriter::xTryDefaultId(
533         const string& id,
534         ILineErrorListener*)
535 //  ----------------------------------------------------------------------------
536 {
537     if (id.empty()) {
538         return false;
539     }
540 
541     string displayName;
542     string colName;
543     if ( NStr::StringToNumeric<TIntId>(id, NStr::fConvErr_NoThrow)) {
544         colName = displayName = "gi";
545     } else {
546         colName = "id";
547         displayName = "accession";
548     }
549 
550     const string defaultValue;
551     xPrepareTableColumn(colName, displayName, defaultValue);
552     xAppendColumnValue(colName, id);
553 
554     mSrcTable->SetNum_rows(mSrcTable->GetNum_rows()+1);
555     return true;
556 }
557 
558 
559 //  ----------------------------------------------------------------------------
xGather(CBioseq_Handle bsh,const string default_id,const FIELDS & desiredFields,ILineErrorListener *)560 bool CSrcWriter::xGather(
561         CBioseq_Handle bsh,
562         const string default_id,
563         const FIELDS& desiredFields,
564         ILineErrorListener*)
565 //  ----------------------------------------------------------------------------
566 {
567     // for each of biosources we may create individual record
568     // with the same ID
569     bool wantGi = ( find(desiredFields.begin(), desiredFields.end(), "gi") != desiredFields.end() );
570     bool wantLocalId = ( find(desiredFields.begin(), desiredFields.end(), "localid") != desiredFields.end() );
571     bool wantBankitId = ( find(desiredFields.begin(), desiredFields.end(), "bankitid") != desiredFields.end() );
572     bool wantDef = ( find(desiredFields.begin(), desiredFields.end(), "definition") != desiredFields.end() );
573     bool wantAuthor = ( find(desiredFields.begin(), desiredFields.end(), "author") != desiredFields.end() );
574     bool wantAffil = ( find(desiredFields.begin(), desiredFields.end(), "affil") != desiredFields.end() );
575 
576 
577     if (!bsh) {
578         return xTryDefaultId(default_id);
579     }
580 
581     if (!xGatherId(bsh) ||
582         (wantGi && !xGatherGi(bsh)) ||
583         (wantLocalId && !xGatherLocalId(bsh)) ||
584         (wantBankitId && !xGatherBankitId(bsh)) ||
585         (wantDef && !xGatherDefline(bsh))) {
586         return false; // Not sure if this is the correct logic.
587                       // If any of the accession, GI, Local ID or definition are invalid/not found,
588                       // the row is not counted.
589     }
590 
591     if (wantAuthor || wantAffil) {
592         string auths;
593         string comma;
594         string affls;
595         const CCit_sub* latest_sub = 0;
596         const CDate* latest_date = 0;
597         for (CSeqdesc_CI pdit(bsh, CSeqdesc::e_Pub); pdit; ++pdit) {
598             const CPubdesc& pubdesc = pdit->GetPub();
599             if (pubdesc.IsSetPub()) {
600                 const CPub_equiv& pep = pubdesc.GetPub();
601                 if (pep.IsSet()) {
602                     ITERATE ( CPub_equiv::Tdata, it, pep.Get()) {
603                         if ((*it)->Which() == CPub::e_Sub) {
604                             const CCit_sub& sub = (*it)->GetSub();
605                             if (sub.IsSetDate()) {
606                                 const CDate& curr_date = sub.GetDate();
607                                 if (latest_date != 0) {
608                                     if (latest_date->Compare(curr_date) == CDate::eCompare_before) {
609                                         latest_sub = &sub;
610                                         latest_date = &curr_date;
611                                     }
612                                 } else {
613                                     latest_sub = &sub;
614                                     latest_date = &curr_date;
615                                 }
616                             }
617                         }
618                     }
619                 }
620             }
621         }
622         if (latest_sub != 0) {
623            if (latest_sub->IsSetAuthors()) {
624               const CAuth_list& authors = latest_sub->GetAuthors();
625               if (authors.IsSetNames()) {
626                   const CAuth_list::TNames& names = authors.GetNames();
627                   if (names.IsStd()) {
628                       ITERATE (CAuth_list::TNames::TStd, it, names.GetStd()) {
629                           const CAuthor& auth = **it;
630                           if (auth.IsSetName()) {
631                               const CPerson_id& pid = auth.GetName();
632                               if (pid.IsName()) {
633                                   const CName_std& name = pid.GetName();
634                                   if (name.IsSetLast()) {
635                                       string nm;
636                                       if (name.IsSetInitials()) {
637                                           nm = name.GetInitials() + " ";
638                                       }
639                                       nm += name.GetLast();
640                                       auths += comma + nm;
641                                   }
642                                   comma = ", ";
643                               } else if (pid.IsConsortium()) {
644                                   string cnsrt = pid.GetConsortium();
645                                   auths += comma + cnsrt;
646                                   comma = ", ";
647                               }
648                           }
649                       }
650                   }
651               }
652               if (authors.IsSetAffil()) {
653                   const CAffil& affil = authors.GetAffil();
654                   if (affil.IsStr()) {
655                       affls = affil.GetStr();
656                   } else if (affil.IsStd()) {
657                       const CAffil::C_Std& std = affil.GetStd();
658                       if (std.IsSetAffil()) {
659                           affls = std.GetAffil();
660                       }
661                   }
662               }
663           }
664         }
665         if (wantAuthor) {
666             if (auths.length() > 0) {
667                 static const string colName = "author";
668                 static const string displayName = colName;
669                 static const string defaultValue;
670 
671                 xPrepareTableColumn(colName, displayName, defaultValue);
672                 xAppendColumnValue(colName, auths);
673             }
674         }
675         if (wantAffil) {
676             if (affls.length() > 0) {
677                 static const string colName = "affil";
678                 static const string displayName = colName;
679                 static const string defaultValue;
680 
681                 xPrepareTableColumn(colName, displayName, defaultValue);
682                 xAppendColumnValue(colName, affls);
683             }
684         }
685     }
686 
687     int num_sources = 0;
688     for (CSeqdesc_CI sdit(bsh, CSeqdesc::e_Source); sdit; ++sdit) {
689         const CBioSource& src = sdit->GetSource();
690         for (FIELDS::const_iterator cit = desiredFields.begin();
691                 cit != desiredFields.end(); ++cit) {
692             if (*cit == "id" || *cit == "gi" || *cit == "definition" || *cit == "localid" ||
693                 *cit == "bankitid" || *cit == "author" || *cit == "affil") {
694                 continue;
695             }
696             if (!xHandleSourceField(src, *cit)) {
697                 return false;
698             }
699         }
700         ++num_sources;
701         mSrcTable->SetNum_rows(mSrcTable->GetNum_rows()+1); // Each source has its own row
702     }
703 
704 
705     if (num_sources == 0) {
706         mSrcTable->SetNum_rows(mSrcTable->GetNum_rows()+1);
707     }
708 
709 
710     return true;
711 }
712 
713 
714 //  ----------------------------------------------------------------------------
xGatherId(CBioseq_Handle bsh,ILineErrorListener *)715 bool CSrcWriter::xGatherId(
716         CBioseq_Handle bsh,
717         ILineErrorListener*)
718 //  ----------------------------------------------------------------------------
719 {
720     if (!bsh) {
721         return false;
722     }
723 
724     string label;
725     CConstRef<CSeq_id> sid = bsh.GetSeqId();
726     label = sequence::GetAccessionForId(*sid, bsh.GetScope());
727 
728     if (!label.empty()) {
729         const string colName = "id";
730         const string defaultValue;
731         xPrepareTableColumn(colName, "accession", defaultValue);
732         xAppendColumnValue(colName, label);
733     }
734     return true;
735 }
736 
737 
738 //  ----------------------------------------------------------------------------
xGatherGi(CBioseq_Handle bsh,ILineErrorListener *)739 bool CSrcWriter::xGatherGi(
740         CBioseq_Handle bsh,
741         ILineErrorListener*)
742 //  ----------------------------------------------------------------------------
743 {
744 
745     if (!bsh) {
746         return false;
747     }
748 
749     const string colName = "gi";
750     string label;
751 
752     ITERATE( CBioseq_Handle::TId, it, bsh.GetId() ) {
753         if( it->IsGi() ){
754             it->GetSeqId()->GetLabel(&label, CSeq_id::eContent);
755             break;
756         }
757     }
758 
759     if (!label.empty()) {
760         const string displayName = "gi";
761         const string defaultValue;
762         xPrepareTableColumn(colName, displayName, defaultValue);
763         xAppendColumnValue(colName, label);
764     }
765     return true;
766 }
767 
768 
769 //  ----------------------------------------------------------------------------
xGetOriginalId(const CBioseq_Handle & bsh) const770 string CSrcWriter::xGetOriginalId(const CBioseq_Handle& bsh) const
771 //  ----------------------------------------------------------------------------
772 {
773     const CBioseq_Handle::TDescr& descr= bsh.GetDescr();
774 
775     FOR_EACH_SEQDESC_ON_SEQDESCR (it, descr) {
776         const CSeqdesc& desc = **it;
777         if ( !desc.IsUser() || !desc.GetUser().IsSetType() ) continue;
778         const CUser_object& usr = desc.GetUser();
779         const CObject_id& oi = usr.GetType();
780         if ( !oi.IsStr() ) continue;
781         const string& type = oi.GetStr();
782         if ( !NStr::EqualNocase(type, "OrginalID") && !NStr::EqualNocase(type, "OriginalID") ) continue;
783         FOR_EACH_USERFIELD_ON_USEROBJECT (uitr, usr) {
784             const CUser_field& fld = **uitr;
785             if ( FIELD_IS_SET_AND_IS(fld, Label, Str) ) {
786                 const string &label_str = GET_FIELD(fld.GetLabel(), Str);
787                 if ( !NStr::EqualNocase(label_str, "LocalId") ) continue;
788                 if ( fld.IsSetData() && fld.GetData().IsStr() ) {
789                     return fld.GetData().GetStr();
790                 }
791             }
792         }
793     }
794 
795     return "";
796 }
797 
798 
799 //  ----------------------------------------------------------------------------
xGatherLocalId(CBioseq_Handle bsh,ILineErrorListener *)800 bool CSrcWriter::xGatherLocalId(
801         CBioseq_Handle bsh,
802         ILineErrorListener*)
803 //  ----------------------------------------------------------------------------
804 {
805     if (!bsh) {
806         return true;
807     }
808 
809     static const string colName = "localid";
810     static const string displayName = colName;
811     static const string defaultValue;
812 
813     string local_id = xGetOriginalId(bsh);
814     if ( NStr::IsBlank(local_id) ) {
815         CConstRef<CSeq_id> seq_id = bsh.GetLocalIdOrNull();
816         if ( !seq_id ) {
817             return true;
818         }
819         seq_id->GetLabel(&local_id, CSeq_id::eContent);
820         if ( NStr::IsBlank(local_id) ) {
821             return true;
822         }
823     }
824 
825     xPrepareTableColumn(colName, displayName, defaultValue);
826     xAppendColumnValue(colName, local_id);
827     return true;
828 }
829 
830 
831 //  ----------------------------------------------------------------------------
xGatherBankitId(CBioseq_Handle bsh,ILineErrorListener *)832 bool CSrcWriter::xGatherBankitId(
833         CBioseq_Handle bsh,
834         ILineErrorListener*)
835 //  ----------------------------------------------------------------------------
836 {
837     if (!bsh) {
838         return true;
839     }
840 
841     static const string colName = "bankitid";
842     static const string displayName = colName;
843     static const string defaultValue;
844 
845     stringstream bankitIdOstr;
846     ITERATE( CBioseq_Handle::TId, it, bsh.GetId() ) {
847         const auto& pId = it->GetSeqId();
848         if (!pId  ||  !pId->IsGeneral()) {
849             continue;
850         }
851         const auto& general = pId->GetGeneral();
852         if (!general.IsSetDb()  ||  general.GetDb() != "BankIt") {
853             continue;
854         }
855         if (!general.IsSetTag()) {
856             continue; // not enough to work with
857         }
858         bankitIdOstr << "BankIt";
859         general.GetTag().AsString(bankitIdOstr);
860         break;
861     }
862     string bankitId = bankitIdOstr.str();
863     if (!bankitId.empty()) {
864         xPrepareTableColumn(colName, displayName, defaultValue);
865         xAppendColumnValue(colName, bankitId);
866     }
867     return true;
868 }
869 
870 
xGatherDefline(CBioseq_Handle bsh,ILineErrorListener *)871 bool CSrcWriter::xGatherDefline(
872         CBioseq_Handle bsh,
873         ILineErrorListener*)
874 //  ----------------------------------------------------------------------------
875 {
876     if (!bsh) {
877         return true;
878     }
879 
880     static const string colName = "definition";
881     static const string displayName = colName;
882     static const string defaultValue;
883 
884     string label = sequence::CDeflineGenerator().GenerateDefline(bsh);
885     if (label.empty()) {
886         return true;
887     }
888     xPrepareTableColumn(colName, displayName, defaultValue);
889     xAppendColumnValue(colName, label);
890     return true;
891 }
892 
893 
894 //  ----------------------------------------------------------------------------
xGatherTaxname(const CBioSource & src,const string & colName,ILineErrorListener *)895 bool CSrcWriter::xGatherTaxname(
896         const CBioSource& src,
897         const string& colName,
898         ILineErrorListener*)
899 //  ----------------------------------------------------------------------------
900 {
901     const string displayName = "organism";
902     const string defaultValue;
903 
904     if (!src.IsSetTaxname()) {
905         return true;
906     }
907     string value = src.GetTaxname();
908     xPrepareTableColumn(colName, displayName, defaultValue);
909     xAppendColumnValue(colName, value);
910     return true;
911 }
912 
913 
914 //  ----------------------------------------------------------------------------
xGatherOrgCommon(const CBioSource & src,const string & colName,ILineErrorListener *)915 bool CSrcWriter::xGatherOrgCommon(
916         const CBioSource& src,
917         const string& colName,
918         ILineErrorListener*)
919 //  ----------------------------------------------------------------------------
920 {
921     const string displayName = "common";
922     const string defaultValue;
923 
924     if (!src.IsSetOrg()  ||  !src.GetOrg().IsSetCommon()) {
925         return true;
926     }
927     string value = src.GetOrg().GetCommon();
928     xPrepareTableColumn(colName, displayName, defaultValue);
929     xAppendColumnValue(colName, value);
930     return true;
931 }
932 
933 
934 //  ----------------------------------------------------------------------------
xGatherOrgnameLineage(const CBioSource & src,const string & colName,ILineErrorListener *)935 bool CSrcWriter::xGatherOrgnameLineage(
936         const CBioSource& src,
937         const string& colName,
938         ILineErrorListener*)
939 //  ----------------------------------------------------------------------------
940 {
941     const string displayName = "lineage";
942     const string defaultValue;
943 
944     if (!src.IsSetOrg()  ||  !src.GetOrg().IsSetOrgname()
945             ||  !src.GetOrg().GetOrgname().IsSetLineage()) {
946         return true;
947     }
948     string value = src.GetOrg().GetOrgname().GetLineage();
949     xPrepareTableColumn(colName, displayName, defaultValue);
950     xAppendColumnValue(colName, value);
951     return true;
952 }
953 
954 
955 //  ----------------------------------------------------------------------------
xGatherDivision(const CBioSource & src,const string & colName,ILineErrorListener *)956 bool CSrcWriter::xGatherDivision(
957         const CBioSource& src,
958         const string& colName,
959         ILineErrorListener*)
960 //  ----------------------------------------------------------------------------
961 {
962     const string displayName = "division";
963     const string defaultValue;
964 
965     if (!src.IsSetOrg()  ||  !src.GetOrg().IsSetDivision()) {
966         return true;
967     }
968     string value = src.GetOrg().GetDivision();
969     xPrepareTableColumn(colName, displayName, defaultValue);
970     xAppendColumnValue(colName, value);
971     return true;
972 }
973 
974 
975 //  ----------------------------------------------------------------------------
xGatherGenome(const CBioSource & src,const string & colName,ILineErrorListener *)976 bool CSrcWriter::xGatherGenome(
977         const CBioSource& src,
978         const string& colName,
979         ILineErrorListener*)
980 //  ----------------------------------------------------------------------------
981 {
982     const string displayName = "genome";
983     const string defaultValue;
984 
985     if (!src.IsSetGenome()) {
986         return true;
987     }
988     string value = CBioSource::GetOrganelleByGenome(src.GetGenome());
989     xPrepareTableColumn(colName, displayName, defaultValue);
990     xAppendColumnValue(colName, value);
991     return true;
992 }
993 
994 
995 //  ----------------------------------------------------------------------------
xGatherOrigin(const CBioSource & src,const string & colName,ILineErrorListener *)996 bool CSrcWriter::xGatherOrigin(
997         const CBioSource& src,
998         const string& colName,
999         ILineErrorListener*)
1000 //  ----------------------------------------------------------------------------
1001 {
1002     const string displayName = "origin";
1003     const string defaultValue;
1004 
1005     if (!src.IsSetOrigin()) {
1006         return true;
1007     }
1008     string value = CBioSource::GetStringFromOrigin(src.GetOrigin());
1009     xPrepareTableColumn(colName, displayName, defaultValue);
1010     xAppendColumnValue(colName, value);
1011     return true;
1012 }
1013 
1014 
1015 //  ----------------------------------------------------------------------------
xGatherSubtypeFeat(const CBioSource & src,const string & colName,ILineErrorListener *)1016 bool CSrcWriter::xGatherSubtypeFeat(
1017         const CBioSource& src,
1018         const string& colName,
1019         ILineErrorListener*)
1020 //  ----------------------------------------------------------------------------
1021 {
1022 
1023 
1024     if ( !src.IsSetSubtype() ) {
1025         return true;
1026     }
1027 
1028     CSubSource::TSubtype subtype = CSubSource::GetSubtypeValue(colName,CSubSource::eVocabulary_raw);
1029 
1030     if ( xIsSubsourceTypeSuppressed(subtype) ) {
1031         return true;
1032     }
1033 
1034 
1035     typedef list<CRef<CSubSource> > SUBSOURCES;
1036     const SUBSOURCES& subsources = src.GetSubtype();
1037 
1038     string key = colName;
1039     int count = 0;
1040     for (SUBSOURCES::const_iterator cit = subsources.begin();
1041             cit != subsources.end(); ++cit) {
1042 
1043         const CSubSource& subsrc = **cit;
1044         if (subsrc.GetSubtype() != subtype) {
1045             continue;
1046         }
1047 
1048         if (count) {
1049             key = colName + "#" + NStr::IntToString(count+1);
1050         }
1051         ++count;
1052 
1053         string value;
1054         if (subsrc.IsSetName()) {
1055             value = subsrc.GetName();
1056         }
1057         if (value.empty()  &&  CSubSource::NeedsNoText(subsrc.GetSubtype())) {
1058             value = "true";
1059         }
1060         xPrepareTableColumn(key, key, "");
1061         xAppendColumnValue(key, value);
1062     }
1063     return true;
1064 }
1065 
1066 
1067 //  ----------------------------------------------------------------------------
xGatherOrgModFeat(const CBioSource & src,const string & colName,ILineErrorListener *)1068 bool CSrcWriter::xGatherOrgModFeat(
1069         const CBioSource& src,
1070         const string& colName,
1071         ILineErrorListener*)
1072 //  ----------------------------------------------------------------------------
1073 {
1074     if ( !src.IsSetOrgMod() ) {
1075         return true;
1076     }
1077 
1078     COrgMod::TSubtype subtype = COrgMod::GetSubtypeValue(colName,COrgMod::eVocabulary_raw);
1079 
1080     if ( xIsOrgmodTypeSuppressed(subtype) ) {
1081         return true;
1082     }
1083 
1084     typedef list<CRef<COrgMod> > ORGMODS;
1085     const ORGMODS& orgmods = src.GetOrgname().GetMod();
1086 
1087     string key = colName;
1088     int count = 0;
1089     for (ORGMODS::const_iterator cit = orgmods.begin();
1090             cit != orgmods.end(); ++cit) {
1091         const COrgMod& orgmod = **cit;
1092 
1093         if (orgmod.GetSubtype() != subtype) {
1094             continue;
1095         }
1096 
1097         if (count) {
1098             key = colName + "#" + NStr::IntToString(count+1);
1099         }
1100         ++count;
1101 
1102         string value = orgmod.GetSubname();
1103         xPrepareTableColumn(key, key, "");
1104         xAppendColumnValue(key, value);
1105     }
1106     return true;
1107 }
1108 
1109 
1110 //  ----------------------------------------------------------------------------
xGatherDb(const CBioSource & src,const string & colName,ILineErrorListener *)1111 bool CSrcWriter::xGatherDb(
1112         const CBioSource& src,
1113         const string& colName,
1114         ILineErrorListener*)
1115 //  ----------------------------------------------------------------------------
1116 {
1117     static const string displayName = "db";
1118     static const string defaultValue;
1119 
1120     if (!src.IsSetOrg()  ||  !src.GetOrg().IsSetDb()) {
1121         return true;
1122     }
1123 
1124     typedef vector< CRef< CDbtag > > DBTAGS;
1125     const DBTAGS& tags = src.GetOrg().GetDb();
1126     for (DBTAGS::const_iterator cit = tags.begin(); cit != tags.end(); ++cit) {
1127         const CDbtag& tag = **cit;
1128         if (!tag.IsSetDb()  ||  tag.GetDb().empty()  ||  !tag.IsSetTag()) {
1129             continue;
1130         }
1131         const CObject_id& objid = tag.GetTag();
1132         string dbtagStr;
1133         switch (objid.Which()) {
1134             default:
1135                 break;
1136             case CObject_id::e_Str:
1137                 if (objid.GetStr().empty()) {
1138                     continue;
1139                 }
1140                 dbtagStr = objid.GetStr();
1141                 break;
1142             case CObject_id::e_Id:
1143                 dbtagStr = NStr::IntToString(objid.GetId());
1144                 break;
1145         }
1146         string curColName = colName;
1147         string curDisplayName = displayName;
1148         curColName += tag.GetDb();
1149         curDisplayName += tag.GetDb();
1150         xPrepareTableColumn(curColName, curDisplayName, "");
1151         xAppendColumnValue(curColName, dbtagStr);
1152     }
1153     return true;
1154 }
1155 
1156 
1157 //  ----------------------------------------------------------------------------
xGatherTaxonId(const CBioSource & src,const string & colName,ILineErrorListener *)1158 bool CSrcWriter::xGatherTaxonId(
1159         const CBioSource& src,
1160         const string& colName,
1161         ILineErrorListener*)
1162 //  ----------------------------------------------------------------------------
1163 {
1164     static const string displayName = "taxid";
1165     static const string defaultValue;
1166 
1167     if (!src.IsSetOrg()  ||  !src.GetOrg().IsSetDb()) {
1168         return true;
1169     }
1170 
1171     typedef vector< CRef< CDbtag > > DBTAGS;
1172     const DBTAGS& tags = src.GetOrg().GetDb();
1173     string taxonIdStr;
1174     for (DBTAGS::const_iterator cit = tags.begin(); cit != tags.end(); ++cit) {
1175         const CDbtag& tag = **cit;
1176         if (!tag.IsSetDb()  ||  tag.GetDb() != "taxon") {
1177             continue;
1178         }
1179         const CObject_id& objid = tag.GetTag();
1180         switch (objid.Which()) {
1181             default:
1182                 return false;
1183             case CObject_id::e_Str:
1184                 if (objid.GetStr().empty()) {
1185                     continue;
1186                 }
1187                 taxonIdStr = objid.GetStr();
1188                 break;
1189             case CObject_id::e_Id:
1190                 taxonIdStr = NStr::IntToString(objid.GetId());
1191                 break;
1192         }
1193         break;
1194     }
1195     string curDisplayName = displayName;
1196     xPrepareTableColumn(colName, displayName, "");
1197     xAppendColumnValue(colName, taxonIdStr);
1198     return true;
1199 }
1200 
1201 
1202 //  ----------------------------------------------------------------------------
xGatherPcrPrimers(const CBioSource & src,const string & colName,ILineErrorListener *)1203 bool CSrcWriter::xGatherPcrPrimers(
1204         const CBioSource& src,
1205         const string& colName,
1206         ILineErrorListener*)
1207 //  ----------------------------------------------------------------------------
1208 {
1209     const string pcrPrimersFwdNames = "pcr-primers.names.fwd";
1210     const string pcrPrimersFwdSequences = "pcr-primers.sequences.fwd";
1211     const string pcrPrimersRevNames = "pcr-primers.names.reverse";
1212     const string pcrPrimersRevSequences = "pcr-primers.sequences.reverse";
1213 
1214     unsigned int columnSetCounter = 0;
1215 
1216     if (!src.IsSetPcr_primers()) {
1217         return true;
1218     }
1219     string fwdName, fwdSequence, revName, revSequence;
1220     const CPCRReactionSet& pcrset = src.GetPcr_primers();
1221 
1222     typedef list<CRef<CPCRReaction> > REACTIONS;
1223     const REACTIONS& reactions = pcrset.Get();
1224     for (REACTIONS::const_iterator cit = reactions.begin();
1225             cit != reactions.end(); ++cit) {
1226         const CPCRReaction& reaction = **cit;
1227         if (reaction.IsSetForward()) {
1228             if (fwdName.empty()) {
1229                 fwdName += ";";
1230                 fwdSequence += ";";
1231             }
1232             fwdName += CSrcWriter::xPrimerSetNames(reaction.GetForward());
1233             fwdSequence += CSrcWriter::xPrimerSetSequences(reaction.GetForward());
1234         }
1235         if (reaction.IsSetReverse()) {
1236             if (revName.empty()) {
1237                 revName += ";";
1238                 revSequence += ";";
1239             }
1240             revName += CSrcWriter::xPrimerSetNames(reaction.GetReverse());
1241             revSequence += CSrcWriter::xPrimerSetSequences(reaction.GetReverse());
1242         }
1243     }
1244     string keyPcrPrimersFwdNames = pcrPrimersFwdNames;
1245     string keyPcrPrimersFwdSequences = pcrPrimersFwdSequences;
1246     string keyPcrPrimersRevNames = pcrPrimersRevNames;
1247     string keyPcrPrimersRevSequences = pcrPrimersRevSequences;
1248     if (columnSetCounter > 0) {
1249         keyPcrPrimersFwdNames += "#" + NStr::IntToString(columnSetCounter);
1250         keyPcrPrimersFwdSequences += "#" + NStr::IntToString(columnSetCounter);
1251         keyPcrPrimersRevNames += "#" + NStr::IntToString(columnSetCounter);
1252         keyPcrPrimersRevSequences += "#" + NStr::IntToString(columnSetCounter);
1253     }
1254     xPrepareTableColumn(
1255             keyPcrPrimersFwdNames, keyPcrPrimersFwdNames, "");
1256     xAppendColumnValue(keyPcrPrimersFwdNames, fwdName);
1257     xPrepareTableColumn(
1258             keyPcrPrimersFwdSequences, keyPcrPrimersFwdSequences, "");
1259     xAppendColumnValue(keyPcrPrimersFwdSequences, fwdSequence);
1260 
1261     xPrepareTableColumn(
1262             keyPcrPrimersRevNames, keyPcrPrimersRevNames, "");
1263     xAppendColumnValue(keyPcrPrimersRevNames, revName);
1264     xPrepareTableColumn(
1265             keyPcrPrimersRevSequences, keyPcrPrimersRevSequences, "");
1266     xAppendColumnValue(keyPcrPrimersRevSequences, revSequence);
1267     return true;
1268 }
1269 
1270 
1271 //  ----------------------------------------------------------------------------
xPrimerSetNames(const CPCRPrimerSet & pset)1272 string CSrcWriter::xPrimerSetNames(const CPCRPrimerSet& pset)
1273     //  ----------------------------------------------------------------------------
1274 {
1275     string names;
1276     typedef list<CRef<CPCRPrimer> > PRIMERS;
1277     const PRIMERS& primers = pset.Get();
1278     for (PRIMERS::const_iterator cit = primers.begin();
1279             cit != primers.end(); ++cit) {
1280         const CPCRPrimer& primer = **cit;
1281         names += ",";
1282         if (primer.IsSetName()) {
1283             names += primer.GetName();
1284         }
1285     }
1286     return names.substr(1);
1287 }
1288 
1289 
1290 //  ----------------------------------------------------------------------------
xPrimerSetSequences(const CPCRPrimerSet & pset)1291 string CSrcWriter::xPrimerSetSequences(const CPCRPrimerSet& pset)
1292     //  ----------------------------------------------------------------------------
1293 {
1294     string sequences;
1295     typedef list<CRef<CPCRPrimer> > PRIMERS;
1296     const PRIMERS& primers = pset.Get();
1297     for (PRIMERS::const_iterator cit = primers.begin();
1298             cit != primers.end(); ++cit) {
1299         const CPCRPrimer& primer = **cit;
1300         sequences += ",";
1301         if (primer.IsSetSeq()) {
1302             sequences += primer.GetSeq();
1303         }
1304     }
1305     return sequences.substr(1);
1306 }
1307 
1308 
1309 //  ----------------------------------------------------------------------------
xFormatTabDelimited(const FIELDS & colStubs,CNcbiOstream & out)1310 bool CSrcWriter::xFormatTabDelimited(
1311         const FIELDS& colStubs,
1312         CNcbiOstream& out)
1313 //  ----------------------------------------------------------------------------
1314 {
1315     // Print columns in the order given in colStubs
1316     map<string,NAMELIST >  ColstubToColnames;
1317     typedef map<string,NAMELIST > COLSTUBNAMESMAP;
1318 
1319 
1320     for (COLUMNMAP::const_iterator cit=mColnameToIndex.begin();
1321             cit != mColnameToIndex.end();  ++cit) {
1322         string colName = cit->first;
1323         string colStub = xGetColStub(colName);
1324         if (ColstubToColnames.find(colStub) == ColstubToColnames.end()) {
1325             ColstubToColnames[colStub] = NAMELIST(1,colName);
1326         } else {
1327             ColstubToColnames[colStub].push_back(colName);
1328         }
1329     }
1330 
1331 
1332     NAMELIST colNames;
1333     for (FIELDS::const_iterator cit = colStubs.begin();
1334             cit != colStubs.end();  ++cit) {
1335         COLSTUBNAMESMAP::iterator mapIter = ColstubToColnames.find(*cit);
1336         if (mapIter != ColstubToColnames.end()) {
1337             colNames.insert(colNames.end(), mapIter->second.begin(),
1338                     mapIter->second.end());
1339         }
1340     }
1341 
1342     // Write the output table
1343     for (NAMELIST::const_iterator cit = colNames.begin();
1344             cit != colNames.end();  ++cit) {
1345         const CSeqTable_column& column = mSrcTable->GetColumn(*cit);
1346         string displayName = column.GetHeader().GetTitle();
1347         out << displayName << CSrcWriter::mDelimiter;
1348     }
1349     out << '\n';
1350 
1351 
1352     unsigned int numRows = mSrcTable->GetNum_rows();
1353     for (unsigned int u=0; u < numRows; ++u) {
1354         for (NAMELIST::const_iterator cit = colNames.begin();
1355                 cit != colNames.end();  ++cit) {
1356             const CSeqTable_column& column = mSrcTable->GetColumn(*cit);
1357             const string* pValue = column.GetStringPtr(u);
1358             bool needsQuotes = xValueNeedsQuoting(*pValue);
1359             if (needsQuotes) {
1360                 out << "\"";
1361             }
1362             out << xDequotedValue(*pValue) << CSrcWriter::mDelimiter;
1363             if (needsQuotes) {
1364                 out << "\"";
1365             }
1366         }
1367         out << '\n';
1368     }
1369     return true;
1370 }
1371 
1372 
1373 //  ----------------------------------------------------------------------------
xGetColStub(const string & colName)1374 string CSrcWriter::xGetColStub(
1375         const string& colName)
1376 //  ----------------------------------------------------------------------------
1377 {
1378     // pcr-primers special case
1379     if  (NStr::Find(colName,"pcr-primers") != NPOS) {
1380         return "pcr-primers";
1381     }
1382 
1383     // case where column name takes the form colStub#Number
1384     size_t position;
1385     if ((position = NStr::Find(colName,"#")) != NPOS) {
1386         return colName.substr(0,position);
1387     }
1388     return colName;
1389 }
1390 
1391 
1392 //  ----------------------------------------------------------------------------
xAppendColumnValue(const string & colName,const string & colValue)1393 void CSrcWriter::xAppendColumnValue(
1394         const string& colName,
1395         const string& colValue)
1396 //  ----------------------------------------------------------------------------
1397 {
1398     size_t index = mColnameToIndex[colName];
1399     CSeqTable_column& column = *mSrcTable->SetColumns().at(index);
1400     column.SetData().SetString().push_back(colValue);
1401 }
1402 
1403 
1404 //  ----------------------------------------------------------------------------
ValidateFields(const FIELDS & fields,ILineErrorListener * pEC)1405 bool CSrcWriter::ValidateFields(
1406         const FIELDS& fields,
1407         ILineErrorListener* pEC)
1408 //  ----------------------------------------------------------------------------
1409 {
1410     for (FIELDS::const_iterator cit = fields.begin(); cit != fields.end(); ++cit) {
1411         string field = *cit;
1412         NAMEMAP::const_iterator mapIter = sFieldnameToColname.find(xCompressFieldName(field));
1413         if (mapIter == sFieldnameToColname.end()) {
1414             CSrcError* pE = CSrcError::Create(
1415                     ncbi::eDiag_Error,
1416                     "Field name \"" + field + "\" not recognized.");
1417             pEC->PutError(*pE);
1418             delete pE;
1419             return false;
1420         }
1421     }
1422     return true;
1423 }
1424 
1425 
1426 //  ----------------------------------------------------------------------------
xValueNeedsQuoting(const string & value)1427 bool CSrcWriter::xValueNeedsQuoting(
1428         const string& value)
1429 //  ----------------------------------------------------------------------------
1430 {
1431     return (value.find(mDelimiter) != string::npos);
1432 }
1433 
1434 
1435 //  ----------------------------------------------------------------------------
xDequotedValue(const string & value)1436 string CSrcWriter::xDequotedValue(
1437         const string& value)
1438 //  For lack of better idea, replace all occurences of "\"" with "\'\'"
1439 //  -----------------------------------------------------------------------------
1440 {
1441     return NStr::Replace(value, "\"", "\'\'");
1442 }
1443 
1444 
1445 //  -----------------------------------------------------------------------------
CSrcError(EDiagSev severity,const string & message)1446 CSrcError::CSrcError(
1447         EDiagSev severity,
1448         const string& message):
1449     //  -----------------------------------------------------------------------------
1450     CLineError(ILineError::eProblem_Unset, severity, "", 0,
1451             "", "", "", message, CLineError::TVecOfLines())
1452 {
1453 }
1454 
1455 
1456 //  -----------------------------------------------------------------------------
Create(EDiagSev severity,const string & message)1457 CSrcError* CSrcError::Create(
1458         EDiagSev severity,
1459         const string& message)
1460 //  -----------------------------------------------------------------------------
1461 {
1462     return new CSrcError(severity, message);
1463 }
1464 
1465 END_NCBI_SCOPE
1466