1 /*  $Id: write_util.cpp 637282 2021-09-09 19:27:15Z ivanov $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors:  Frank Ludwig
27  *
28  * File Description:  Write gff file
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
33 #include <objmgr/seqdesc_ci.hpp>
34 #include <objects/seq/Seq_annot.hpp>
35 #include <objects/seq/Annot_descr.hpp>
36 #include <objects/seq/seqport_util.hpp>
37 #include <objects/general/User_object.hpp>
38 #include <objects/general/Object_id.hpp>
39 #include <objects/general/Dbtag.hpp>
40 #include <objects/seqfeat/BioSource.hpp>
41 #include <objects/seqfeat/OrgName.hpp>
42 #include <objects/seqfeat/OrgMod.hpp>
43 #include <objects/seqfeat/SubSource.hpp>
44 #include <objtools/writers/write_util.hpp>
45 #include <objtools/writers/feature_context.hpp>
46 #include <objmgr/util/sequence.hpp>
47 
48 #include <objtools/writers/writer_exception.hpp>
49 #include <objtools/writers/genbank_id_resolve.hpp>
50 
51 BEGIN_NCBI_SCOPE
52 USING_SCOPE(objects);
53 
54 //  ----------------------------------------------------------------------------
GetDescriptor(const CSeq_annot & annot,const string & strType)55 CRef<CUser_object> CWriteUtil::GetDescriptor(
56     const CSeq_annot& annot,
57     const string& strType )
58 //  ----------------------------------------------------------------------------
59 {
60     CRef< CUser_object > pUser;
61     if (!annot.IsSetDesc()) {
62         return pUser;
63     }
64 
65     const list<CRef<CAnnotdesc> > descriptors = annot.GetDesc().Get();
66     list<CRef<CAnnotdesc> >::const_iterator it;
67     for (it = descriptors.begin(); it != descriptors.end(); ++it) {
68         if (!(*it)->IsUser()) {
69             continue;
70         }
71         const CUser_object& user = (*it)->GetUser();
72         if (user.GetType().GetStr() == strType) {
73             pUser.Reset(new CUser_object);
74             pUser->Assign(user);
75             return pUser;
76         }
77     }
78     return pUser;
79 }
80 
81 //  ----------------------------------------------------------------------------
GetGenomeString(const CBioSource & bs,string & genome_str)82 bool CWriteUtil::GetGenomeString(
83     const CBioSource& bs,
84     string& genome_str )
85 //  ----------------------------------------------------------------------------
86 {
87 #define EMIT(str) { genome_str = str; return true; }
88 
89     if (!bs.IsSetGenome()) {
90         return false;
91     }
92     switch (bs.GetGenome()) {
93         default:
94             return false;
95         case CBioSource::eGenome_apicoplast: EMIT("apicoplast");
96         case CBioSource::eGenome_chloroplast: EMIT("chloroplast");
97         case CBioSource::eGenome_chromatophore: EMIT("chromatophore");
98         case CBioSource::eGenome_chromoplast: EMIT("chromoplast");
99         case CBioSource::eGenome_chromosome: EMIT("chromosome");
100         case CBioSource::eGenome_cyanelle: EMIT("cyanelle");
101         case CBioSource::eGenome_endogenous_virus: EMIT("endogenous_virus");
102         case CBioSource::eGenome_extrachrom: EMIT("extrachrom");
103         case CBioSource::eGenome_genomic: EMIT("genomic");
104         case CBioSource::eGenome_hydrogenosome: EMIT("hydrogenosome");
105         case CBioSource::eGenome_insertion_seq: EMIT("insertion_seq");
106         case CBioSource::eGenome_kinetoplast: EMIT("kinetoplast");
107         case CBioSource::eGenome_leucoplast: EMIT("leucoplast");
108         case CBioSource::eGenome_macronuclear: EMIT("macronuclear");
109         case CBioSource::eGenome_mitochondrion: EMIT("mitochondrion");
110         case CBioSource::eGenome_nucleomorph: EMIT("nucleomorph");
111         case CBioSource::eGenome_plasmid: EMIT("plasmid");
112         case CBioSource::eGenome_plastid: EMIT("plastid");
113         case CBioSource::eGenome_proplastid: EMIT("proplastid");
114         case CBioSource::eGenome_proviral: EMIT("proviral");
115         case CBioSource::eGenome_transposon: EMIT("transposon");
116         case CBioSource::eGenome_unknown: EMIT("unknown");
117         case CBioSource::eGenome_virion: EMIT("virion");
118     }
119 }
120 #undef EMIT
121 
122 //  ----------------------------------------------------------------------------
GetIdType(const CSeq_id & seqId,string & idType)123 bool CWriteUtil::GetIdType(
124     const CSeq_id& seqId,
125     string& idType )
126 //  ----------------------------------------------------------------------------
127 {
128 #define EMIT(str) { idType = str; return true; }
129     switch(seqId.Which()) {
130     default:
131         idType = CSeq_id::SelectionName(seqId.Which());
132         NStr::ToUpper(idType);
133         break;
134 
135     case CSeq_id::e_Local: EMIT("Local");
136 
137     case CSeq_id::e_Gibbsq:
138     case CSeq_id::e_Gibbmt:
139     case CSeq_id::e_Giim:
140     case CSeq_id::e_Gi: EMIT("GenInfo");
141 
142     case CSeq_id::e_Genbank: EMIT("Genbank");
143     case CSeq_id::e_Swissprot: EMIT("SwissProt");
144     case CSeq_id::e_Patent: EMIT("Patent");
145     case CSeq_id::e_Other: EMIT("RefSeq");
146     case CSeq_id::e_Ddbj: EMIT("DDBJ");
147     case CSeq_id::e_Embl: EMIT("EMBL");
148     case CSeq_id::e_Pir: EMIT("PIR");
149     case CSeq_id::e_Prf: EMIT("PRF");
150     case CSeq_id::e_Pdb: EMIT("PDB");
151     case CSeq_id::e_Tpg: EMIT("tpg");
152     case CSeq_id::e_Tpe: EMIT("tpe");
153     case CSeq_id::e_Tpd: EMIT("tpd");
154     case CSeq_id::e_Gpipe: EMIT("gpipe");
155     case CSeq_id::e_Named_annot_track: EMIT("NADB");
156     case CSeq_id::e_General:
157         EMIT(seqId.GetGeneral().GetDb());
158     }
159 #undef EMIT
160     return true;
161 }
162 
163 //  ----------------------------------------------------------------------------
GetIdType(CBioseq_Handle bsh,string & idType)164 bool CWriteUtil::GetIdType(
165     CBioseq_Handle bsh,
166     string& idType )
167 //  ----------------------------------------------------------------------------
168 {
169     if (!bsh) {
170         return false;
171     }
172     CSeq_id_Handle best_idh;
173     try {
174         best_idh = sequence::GetId(bsh, sequence::eGetId_Best);
175         if ( !best_idh ) {
176             best_idh = sequence::GetId(bsh, sequence::eGetId_Canonical);
177         }
178     }
179     catch(...) {
180         return false;
181     }
182     return GetIdType(*best_idh.GetSeqId(), idType);
183 }
184 
185 //  ----------------------------------------------------------------------------
GetOrgModSubType(const COrgMod & mod,string & subtype,string & subname)186 bool CWriteUtil::GetOrgModSubType(
187     const COrgMod& mod,
188     string& subtype,
189     string& subname)
190 //  ----------------------------------------------------------------------------
191 {
192     if (!mod.IsSetSubtype() || !mod.IsSetSubname()) {
193         return false;
194     }
195     subtype = COrgMod::GetSubtypeName(mod.GetSubtype());
196     subname = mod.GetSubname();
197     return true;
198 }
199 
200 //  ----------------------------------------------------------------------------
GetSubSourceSubType(const CSubSource & sub,string & subtype,string & subname)201 bool CWriteUtil::GetSubSourceSubType(
202     const CSubSource& sub,
203     string& subtype,
204     string& subname)
205 //  ----------------------------------------------------------------------------
206 {
207 #define EMIT(str) { subname = str; return true; }
208     if (!sub.IsSetSubtype() || !sub.IsSetName()) {
209         return false;
210     }
211     subtype = CSubSource::GetSubtypeName(sub.GetSubtype());
212 
213     switch (sub.GetSubtype()) {
214         default:
215             if (sub.GetName().empty()) {
216                 EMIT("indeterminate");
217             }
218             EMIT(sub.GetName());
219         case CSubSource::eSubtype_environmental_sample:
220         case CSubSource::eSubtype_germline:
221         case CSubSource::eSubtype_transgenic:
222         case CSubSource::eSubtype_rearranged:
223         case CSubSource::eSubtype_metagenomic:
224             EMIT("true");
225     }
226     return true;
227 #undef EMIT
228 }
229 
230 //  ----------------------------------------------------------------------------
GetAaName(const CCode_break & cb,string & aaName)231 bool CWriteUtil::GetAaName(
232     const CCode_break& cb,
233     string& aaName )
234 //  ----------------------------------------------------------------------------
235 {
236     static const char* AANames[] = {
237         "---", "Ala", "Asx", "Cys", "Asp", "Glu", "Phe", "Gly", "His", "Ile",
238         "Lys", "Leu", "Met", "Asn", "Pro", "Gln", "Arg", "Ser", "Thr", "Val",
239         "Trp", "Other", "Tyr", "Glx", "Sec", "TERM", "Pyl"
240     };
241     static const char* other = "OTHER";
242 
243     unsigned char aa(0);
244     switch (cb.GetAa().Which()) {
245         case CCode_break::C_Aa::e_Ncbieaa:
246             aa = cb.GetAa().GetNcbieaa();
247             aa = CSeqportUtil::GetMapToIndex(
248                 CSeq_data::e_Ncbieaa, CSeq_data::e_Ncbistdaa, aa);
249             break;
250         case CCode_break::C_Aa::e_Ncbi8aa:
251             aa = cb.GetAa().GetNcbi8aa();
252             break;
253         case CCode_break::C_Aa::e_Ncbistdaa:
254             aa = cb.GetAa().GetNcbistdaa();
255             break;
256         default:
257             return false;
258     }
259     aaName = ((aa < sizeof(AANames)/sizeof(*AANames)) ? AANames[aa] : other);
260     return true;
261 }
262 
263 //  ----------------------------------------------------------------------------
GetCodeBreak(const CCode_break & cb,string & cbString)264 bool CWriteUtil::GetCodeBreak(
265     const CCode_break& cb,
266     string& cbString )
267 //  ----------------------------------------------------------------------------
268 {
269     string cb_str = ("(pos:");
270     if ( cb.IsSetLoc() ) {
271         const CCode_break::TLoc& loc = cb.GetLoc();
272         switch( loc.Which() ) {
273             default: {
274                 cb_str += NStr::IntToString( loc.GetStart(eExtreme_Positional)+1 );
275                 cb_str += "..";
276                 cb_str += NStr::IntToString( loc.GetStop(eExtreme_Positional)+1 );
277                 break;
278             }
279             case CSeq_loc::e_Int: {
280                 const CSeq_interval& intv = loc.GetInt();
281                 string intv_str;
282                 intv_str += NStr::IntToString( intv.GetFrom()+1 );
283                 intv_str += "..";
284                 intv_str += NStr::IntToString( intv.GetTo()+1 );
285                 if ( intv.IsSetStrand()  &&  intv.GetStrand() == eNa_strand_minus ) {
286                     intv_str = "complement(" + intv_str + ")";
287                 }
288                 cb_str += intv_str;
289                 break;
290             }
291         }
292     }
293     cb_str += ",aa:";
294 
295     string aaName;
296     if (!CWriteUtil::GetAaName(cb, aaName)) {
297         return false;
298     }
299     cb_str += aaName + ")";
300     cbString = cb_str;
301     return true;
302 }
303 
304 //  ----------------------------------------------------------------------------
GetTrnaCodons(const CTrna_ext & trna,string & codonStr)305 bool CWriteUtil::GetTrnaCodons(
306     const CTrna_ext& trna,
307     string& codonStr )
308 //  ----------------------------------------------------------------------------
309 {
310     if (!trna.IsSetCodon()) {
311         return false;
312     }
313     const list<int>& values = trna.GetCodon();
314     if (values.empty()) {
315         return false;
316     }
317     list<int>::const_iterator cit = values.begin();
318     string codons = NStr::IntToString(*cit);
319     for (cit++; cit != values.end(); ++cit) {
320         codons += ",";
321         codons += NStr::IntToString(*cit);
322     }
323     codonStr = codons;
324     return true;
325 }
326 
327 //  ----------------------------------------------------------------------------
GetGeneRefGene(const CGene_ref & generef,string & gene)328 bool CWriteUtil::GetGeneRefGene(
329     const CGene_ref& generef,
330     string& gene )
331 //  ----------------------------------------------------------------------------
332 {
333 #define EMIT(str) { gene = str; return true; }
334    if (generef.IsSetLocus()) {
335         EMIT(generef.GetLocus());
336     }
337     if (generef.IsSetSyn()  && generef.GetSyn().size() > 0) {
338         EMIT(generef.GetSyn().front());
339     }
340     if (generef.IsSetDesc()) {
341         EMIT(generef.GetDesc());
342     }
343     return false;
344 #undef EMIT
345 }
346 
347 //  ----------------------------------------------------------------------------
GetTrnaProductName(const CTrna_ext & trna,string & name)348 bool CWriteUtil::GetTrnaProductName(
349     const CTrna_ext& trna,
350     string& name )
351 //  ----------------------------------------------------------------------------
352 {
353     static const string sTrnaList[] = {
354         "tRNA-Gap", "tRNA-Ala", "tRNA-Asx", "tRNA-Cys", "tRNA-Asp", "tRNA-Glu",
355         "tRNA-Phe", "tRNA-Gly", "tRNA-His", "tRNA-Ile", "tRNA-Xle", "tRNA-Lys",
356         "tRNA-Leu", "tRNA-Met", "tRNA-Asn", "tRNA-Pyl", "tRNA-Pro", "tRNA-Gln",
357         "tRNA-Arg", "tRNA-Ser", "tRNA-Thr", "tRNA-Sec", "tRNA-Val", "tRNA-Trp",
358         "tRNA-OTHER", "tRNA-Tyr", "tRNA-Glx", "tRNA-TERM"
359     };
360     static int AACOUNT = sizeof(sTrnaList)/sizeof(string);
361 
362     if (!trna.IsSetAa()  ||  !trna.GetAa().IsNcbieaa()) {
363         return false;
364     }
365     int aa = trna.GetAa().GetNcbieaa();
366     (aa == '*') ? (aa = 25) : (aa -= 64);
367     name = ((0 < aa  &&  aa < AACOUNT) ? sTrnaList[aa] : "");
368     return true;
369 }
370 
371 //  ----------------------------------------------------------------------------
GetTrnaAntiCodon(const CTrna_ext & trna,string & acStr)372 bool CWriteUtil::GetTrnaAntiCodon(
373     const CTrna_ext& trna,
374     string& acStr )
375 //  ----------------------------------------------------------------------------
376 {
377     if (!trna.IsSetAnticodon()) {
378         return false;
379     }
380     const CSeq_loc& loc = trna.GetAnticodon();
381     string anticodon;
382     switch( loc.Which() ) {
383         default: {
384             anticodon += NStr::IntToString( loc.GetStart(eExtreme_Positional)+1 );
385             anticodon += "..";
386             anticodon += NStr::IntToString( loc.GetStop(eExtreme_Positional)+1 );
387             break;
388         }
389         case CSeq_loc::e_Int: {
390             const CSeq_interval& intv = loc.GetInt();
391             anticodon += NStr::IntToString( intv.GetFrom()+1 );
392             anticodon += "..";
393             anticodon += NStr::IntToString( intv.GetTo()+1 );
394             if ( intv.IsSetStrand()  &&  intv.GetStrand() == eNa_strand_minus ) {
395                 anticodon = "complement(" + anticodon + ")";
396             }
397             break;
398         }
399     }
400     acStr = string("(pos:") + anticodon + ")";
401     return true;
402 }
403 
404 //  ----------------------------------------------------------------------------
GetDbTag(const CDbtag & dbtag,string & dbTagStr)405 bool CWriteUtil::GetDbTag(
406     const CDbtag& dbtag,
407     string& dbTagStr )
408 //
409 //  Note: Different from CDbtag::GetLabel()
410 //  ----------------------------------------------------------------------------
411 {
412     string str;
413     if ( dbtag.IsSetDb() ) {
414         str += dbtag.GetDb();
415     }
416     else {
417         str += "NoDB";
418     }
419     if ( dbtag.IsSetTag() ) {
420         if (!str.empty()) {
421             str += ":";
422         }
423         if (dbtag.GetTag().IsId() ) {
424             str += NStr::UIntToString( dbtag.GetTag().GetId() );
425         }
426         if ( dbtag.GetTag().IsStr() ) {
427             str += dbtag.GetTag().GetStr();
428         }
429     }
430     if (str.empty()) {
431         return false;
432     }
433     dbTagStr = str;
434     return true;
435 }
436 
437 //  ----------------------------------------------------------------------------
GetBiomol(CBioseq_Handle bsh,string & mol_str)438 bool CWriteUtil::GetBiomol(
439     CBioseq_Handle bsh,
440     string& mol_str)
441 //  ----------------------------------------------------------------------------
442 {
443 #define EMIT(str) { mol_str = str; return true; }
444     CSeqdesc_CI md(bsh.GetParentEntry(), CSeqdesc::e_Molinfo, 0);
445     if (!md) {
446         return false;
447     }
448     const CMolInfo& molinfo = md->GetMolinfo();
449     if (!molinfo.IsSetBiomol()) {
450         return false;
451     }
452 
453     int inst = bsh.GetInst_Mol();
454     int mol = molinfo.GetBiomol();
455 
456     switch( mol ) {
457         default:
458             break;
459         case CMolInfo::eBiomol_genomic: {
460             switch (inst) {
461                 default:
462                     EMIT("genomic");
463                 case CSeq_inst::eMol_dna:
464                     EMIT("genomic DNA");
465                 case CSeq_inst::eMol_rna:
466                     EMIT("genomic RNA");
467             }
468         }
469         case CMolInfo::eBiomol_mRNA:
470             EMIT("mRNA");
471         case CMolInfo::eBiomol_rRNA:
472             EMIT("rRNA");
473         case CMolInfo::eBiomol_tRNA:
474             EMIT("tRNA");
475         case CMolInfo::eBiomol_pre_RNA:
476         case CMolInfo::eBiomol_snRNA:
477         case CMolInfo::eBiomol_scRNA:
478         case CMolInfo::eBiomol_snoRNA:
479         case CMolInfo::eBiomol_ncRNA:
480         case CMolInfo::eBiomol_tmRNA:
481         case CMolInfo::eBiomol_transcribed_RNA:
482             EMIT("transcribed RNA");
483         case CMolInfo::eBiomol_other_genetic:
484         case CMolInfo::eBiomol_other: {
485             switch (inst) {
486                 default:
487                     EMIT("other");
488                 case CSeq_inst::eMol_dna:
489                     EMIT("other DNA");
490                 case CSeq_inst::eMol_rna:
491                     EMIT("other RNA");
492             }
493         }
494         case CMolInfo::eBiomol_cRNA:
495             EMIT("viral cRNA");
496 
497         case CMolInfo::eBiomol_genomic_mRNA:
498             EMIT("genomic RNA");
499     }
500     switch (inst) {
501         default:
502             EMIT("unassigned");
503         case CSeq_inst::eMol_dna:
504             EMIT("unassigned DNA");
505         case CSeq_inst::eMol_rna:
506             EMIT("unassigned RNA");
507     }
508     return false;
509 #undef EMIT
510 }
511 
512 //  ----------------------------------------------------------------------------
UrlEncode(const string & raw)513 string CWriteUtil::UrlEncode(
514     const string& raw)
515 //  ----------------------------------------------------------------------------
516 {
517     static const char s_Table[256][4] = {
518         "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07", "%08", "%09",
519         "%0A", "%0B", "%0C", "%0D", "%0E", "%0F", "%10", "%11", "%12", "%13",
520         "%14", "%15", "%16", "%17", "%18", "%19", "%1A", "%1B", "%1C", "%1D",
521         "%1E", "%1F", " ",   "!",   "%22", "%23", "$",   "%25", "%26", "%27",
522         "%28", "%29", "%2A", "%2B", "%2C", "-",   ".",   "%2F", "0",   "1",
523         "2",   "3",   "4",   "5",   "6",   "7",   "8",   "9",   ":",   "%3B",
524         "%3C", "%3D", "%3E", "%3F", "@",   "A",   "B",   "C",   "D",   "E",
525         "F",   "G",   "H",   "I",   "J",   "K",   "L",   "M",   "N",   "O",
526         "P",   "Q",   "R",   "S",   "T",   "U",   "V",   "W",   "X",   "Y",
527         "Z",   "%5B", "%5C", "%5D", "^",   "_",   "%60", "a",   "b",   "c",
528         "d",   "e",   "f",   "g",   "h",   "i",   "j",   "k",   "l",   "m",
529         "n",   "o",   "p",   "q",   "r",   "s",   "t",   "u",   "v",   "w",
530         "x",   "y",   "z",   "%7B", "%7C", "%7D", "%7E", "%7F", "%80", "%81",
531         "%82", "%83", "%84", "%85", "%86", "%87", "%88", "%89", "%8A", "%8B",
532         "%8C", "%8D", "%8E", "%8F", "%90", "%91", "%92", "%93", "%94", "%95",
533         "%96", "%97", "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F",
534         "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7", "%A8", "%A9",
535         "%AA", "%AB", "%AC", "%AD", "%AE", "%AF", "%B0", "%B1", "%B2", "%B3",
536         "%B4", "%B5", "%B6", "%B7", "%B8", "%B9", "%BA", "%BB", "%BC", "%BD",
537         "%BE", "%BF", "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7",
538         "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF", "%D0", "%D1",
539         "%D2", "%D3", "%D4", "%D5", "%D6", "%D7", "%D8", "%D9", "%DA", "%DB",
540         "%DC", "%DD", "%DE", "%DF", "%E0", "%E1", "%E2", "%E3", "%E4", "%E5",
541         "%E6", "%E7", "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF",
542         "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7", "%F8", "%F9",
543         "%FA", "%FB", "|", "%FD", "%FE", "%FF"
544     };
545 
546     string encoded;
547     for ( size_t i = 0;  i < raw.size();  ++i ) {
548         encoded += s_Table[static_cast<unsigned char>( raw[i] )];
549     }
550     return encoded;
551 }
552 
553 //  ----------------------------------------------------------------------------
IsLocationOrdered(const CSeq_loc & loc)554 bool CWriteUtil::IsLocationOrdered(
555     const CSeq_loc& loc)
556 //  Look whether the given location contains any eNull intervals. If so, the
557 //  location is ordered, otherwise not.
558 //  ----------------------------------------------------------------------------
559 {
560     switch ( loc.Which() ) {
561     case CSeq_loc::e_Null:
562         return true;
563     case CSeq_loc::e_Mix: {
564             ITERATE (CSeq_loc_mix::Tdata, sub_loc, loc.GetMix().Get()) {
565                 if (IsLocationOrdered(**sub_loc)) {
566                     return true;
567                 }
568             }
569             return false;
570         }
571     default:
572         return false;
573     }
574 }
575 
576 //  ----------------------------------------------------------------------------
IsSequenceCircular(CBioseq_Handle bsh)577 bool CWriteUtil::IsSequenceCircular(
578     CBioseq_Handle bsh)
579 //  ----------------------------------------------------------------------------
580 {
581     if (!bsh  ||  !bsh.IsSetInst_Topology()
582               ||  bsh.GetInst_Topology() != CSeq_inst::eTopology_circular) {
583         return false;
584     }
585     return true;
586 }
587 
588 //  ----------------------------------------------------------------------------
NeedsQuoting(const string & str)589 bool CWriteUtil::NeedsQuoting(
590     const string& str )
591 //  ----------------------------------------------------------------------------
592 {
593     if(str.empty())
594         return true;
595 
596     for (size_t u=0; u < str.length(); ++u) {
597         if (str[u] == '\"')
598             return false;
599         if (str[u] == ' ' || str[u] == ';' || str[u] == ':' || str[u] == '=') {
600             return true;
601         }
602     }
603     return false;
604 }
605 
606 //  ----------------------------------------------------------------------------
ChangeToPackedInt(CSeq_loc & loc)607 void CWriteUtil::ChangeToPackedInt(
608     CSeq_loc& loc)
609 //  Special mission:
610 //  Filter out eNull intervals before submitting the location to the "normal"
611 //  ChangeToPackedInt() method.
612 //  ----------------------------------------------------------------------------
613 {
614     switch ( loc.Which() ) {
615     case CSeq_loc::e_Null:
616         loc.SetPacked_int();
617         return;
618     case CSeq_loc::e_Mix: {
619             vector<CRef<CSeq_loc> > sub_locs;
620             sub_locs.reserve(loc.GetMix().Get().size());
621             ITERATE (CSeq_loc_mix::Tdata, orig_sub_loc, loc.GetMix().Get()) {
622                 if ((*orig_sub_loc)->Which() == CSeq_loc::e_Null) {
623                     continue;
624                 }
625                 CRef<CSeq_loc> new_sub_loc(new CSeq_loc);
626                 new_sub_loc->Assign(**orig_sub_loc);
627                 ChangeToPackedInt(*new_sub_loc);
628                 sub_locs.push_back(new_sub_loc);
629             }
630             loc.SetPacked_int();  // in case there are zero intervals
631             ITERATE (vector<CRef<CSeq_loc> >, sub_loc, sub_locs) {
632                 copy((*sub_loc)->GetPacked_int().Get().begin(),
633                      (*sub_loc)->GetPacked_int().Get().end(),
634                      back_inserter(loc.SetPacked_int().Set()));
635             }
636         }
637         return;
638     default:
639         loc.ChangeToPackedInt();
640         return;
641     }
642 }
643 
644 //  ----------------------------------------------------------------------------
GetBestId(CSeq_id_Handle idh,CScope & scope,string & best_id)645 bool CWriteUtil::GetBestId(
646     CSeq_id_Handle idh,
647     CScope& scope,
648     string& best_id)
649 //  ----------------------------------------------------------------------------
650 {
651     return CGenbankIdResolve::Get().GetBestId(idh, scope, best_id);
652 }
653 
654 //  ----------------------------------------------------------------------------
GetBestId(const CMappedFeat & mf,string & best_id)655 bool CWriteUtil::GetBestId(
656     const CMappedFeat& mf,
657     string& best_id)
658 //  ----------------------------------------------------------------------------
659 {
660     return CGenbankIdResolve::Get().GetBestId(mf, best_id);
661 }
662 
663 //  ----------------------------------------------------------------------------
GetQualifier(CMappedFeat mf,const string & key,string & value)664 bool CWriteUtil::GetQualifier(
665     CMappedFeat mf,
666     const string& key,
667     string& value)
668 //  ----------------------------------------------------------------------------
669 {
670     if (!mf.IsSetQual()) {
671         return false;
672     }
673     const vector<CRef<CGb_qual> >& quals = mf.GetQual();
674     vector<CRef<CGb_qual> >::const_iterator it = quals.begin();
675     for (; it != quals.end(); ++it) {
676         if (!(*it)->CanGetQual() || !(*it)->CanGetVal()) {
677             continue;
678         }
679         if ((*it)->GetQual() == key) {
680             value = (*it)->GetVal();
681             return true;
682         }
683     }
684     return false;
685 }
686 
687 //  ---------------------------------------------------------------------------
xAssignSequenceIsGenomicRecord()688 void CGffFeatureContext::xAssignSequenceIsGenomicRecord()
689 //  ---------------------------------------------------------------------------
690 {
691     m_bSequenceIsGenomicRecord = false;
692     if (!m_bsh) {
693         return;
694     }
695     if (!m_bsh || !m_bsh.IsSetDescr()) {
696         return;
697     }
698     const CSeq_descr& descr = m_bsh.GetDescr();
699     if (!descr.CanGet()) {
700         return;
701     }
702     const list< CRef< CSeqdesc > >& listDescr = descr.Get();
703     for (list< CRef< CSeqdesc > >::const_iterator cit = listDescr.begin();
704         cit != listDescr.end(); ++cit) {
705         const CSeqdesc& desc = **cit;
706         if (!desc.IsMolinfo()) {
707             continue;
708         }
709         const CMolInfo& molInfo = desc.GetMolinfo();
710         if (!molInfo.IsSetBiomol()) {
711             continue;
712         }
713         CMolInfo::TBiomol bioMol = molInfo.GetBiomol();
714         m_bSequenceIsGenomicRecord = (
715             (bioMol == CMolInfo::eBiomol_genomic) ||
716             (bioMol == CMolInfo::eBiomol_cRNA));
717         return;
718     }
719     return;
720 }
721 
722 //  ---------------------------------------------------------------------------
xAssignSequenceHasBioSource()723 void CGffFeatureContext::xAssignSequenceHasBioSource()
724 //  ---------------------------------------------------------------------------
725 {
726     m_bSequenceHasBioSource = false;
727     if (!m_bsh) {
728         return;
729     }
730     if (m_bsh.IsSetDescr()) {
731         const CSeq_descr& descr = m_bsh.GetDescr();
732         if (descr.CanGet()) {
733             const list< CRef< CSeqdesc > >& listDescr = descr.Get();
734             for (list< CRef< CSeqdesc > >::const_iterator cit = listDescr.begin();
735                     cit != listDescr.end(); ++cit) {
736                 const CSeqdesc& desc = **cit;
737                 if (desc.IsSource()) {
738                     m_bSequenceHasBioSource = true;
739                     return;
740                 }
741             }
742         }
743     }
744     CBioseq_set_Handle setH;
745     setH = m_bsh.GetParentBioseq_set();
746     if (setH  &&  setH.IsSetDescr()) {
747         const CSeq_descr& descr = setH.GetDescr();
748         if (descr.CanGet()) {
749             const list< CRef< CSeqdesc > >& listDescr = descr.Get();
750             for (list< CRef< CSeqdesc > >::const_iterator cit = listDescr.begin();
751                     cit != listDescr.end(); ++cit) {
752                 const CSeqdesc& desc = **cit;
753                 if (desc.IsSource()) {
754                     m_bSequenceHasBioSource = true;
755                     return;
756                 }
757             }
758         }
759     }
760     return;
761 }
762 
763 // ----------------------------------------------------------------------------
FindBestGeneParent(const CMappedFeat & mf)764 CMappedFeat CGffFeatureContext::FindBestGeneParent(const CMappedFeat& mf)
765 // ----------------------------------------------------------------------------
766 {
767     if (mf == m_mfLastIn) {
768         return m_mfLastOut;
769     }
770     m_mfLastIn = mf;
771 
772     CSeqFeatData::ESubtype subType = mf.GetFeatSubtype();
773     if (subType == CSeqFeatData::eSubtype_mobile_element) {
774         m_mfLastOut = CMappedFeat();
775         return m_mfLastOut;
776     }
777 
778     if (mf.GetFeatSubtype() == CSeqFeatData::eSubtype_mRNA) {
779         m_mfLastOut = feature::GetBestGeneForMrna(mf, &m_ft);
780     }
781     else {
782         m_mfLastOut = feature::GetBestGeneForFeat(mf, &m_ft);
783     }
784     return m_mfLastOut;
785 }
786 
787 //  ----------------------------------------------------------------------------
GetUserObjectByType(const CUser_object & uo,const string & strType)788 CConstRef<CUser_object> CWriteUtil::GetUserObjectByType(
789     const CUser_object& uo,
790     const string& strType)
791 //  ----------------------------------------------------------------------------
792 {
793     if (uo.IsSetType() && uo.GetType().IsStr() &&
794         uo.GetType().GetStr() == strType) {
795         return CConstRef<CUser_object>(&uo);
796     }
797     const CUser_object::TData& fields = uo.GetData();
798     for (CUser_object::TData::const_iterator it = fields.begin();
799         it != fields.end();
800         ++it) {
801         const CUser_field& field = **it;
802         if (field.IsSetData()) {
803             const CUser_field::TData& data = field.GetData();
804             if (data.Which() == CUser_field::TData::e_Object) {
805                 CConstRef<CUser_object> recur = CWriteUtil::GetUserObjectByType(
806                     data.GetObject(), strType);
807                 if (recur) {
808                     return recur;
809                 }
810             }
811         }
812     }
813     return CConstRef<CUser_object>();
814 }
815 
816 //  ----------------------------------------------------------------------------
GetUserObjectByType(const list<CRef<CUser_object>> & uos,const string & strType)817 CConstRef<CUser_object> CWriteUtil::GetUserObjectByType(
818     const list<CRef<CUser_object > >& uos,
819     const string& strType)
820     //  ----------------------------------------------------------------------------
821 {
822     CConstRef<CUser_object> pResult;
823     typedef list<CRef<CUser_object > >::const_iterator CIT;
824     for (CIT cit = uos.begin(); cit != uos.end(); ++cit) {
825         const CUser_object& uo = **cit;
826         pResult = CWriteUtil::GetUserObjectByType(uo, strType);
827         if (pResult) {
828             return pResult;
829         }
830     }
831     return CConstRef<CUser_object>();
832 }
833 
834 //  ----------------------------------------------------------------------------
GetModelEvidence(CMappedFeat mf)835 CConstRef<CUser_object> CWriteUtil::GetModelEvidence(
836     CMappedFeat mf)
837 //  ----------------------------------------------------------------------------
838 {
839     CConstRef<CUser_object> me;
840     if (mf.IsSetExt()) {
841         me = CWriteUtil::GetUserObjectByType(mf.GetExt(), "ModelEvidence");
842     }
843     if (!me  &&  mf.IsSetExts()) {
844         me = CWriteUtil::GetUserObjectByType(mf.GetExts(), "ModelEvidence");
845     }
846     return me;
847 }
848 
849 //  -----------------------------------------------------------------------------
850 size_t
s_CountAccessions(const CUser_field & field)851 s_CountAccessions(
852     const CUser_field& field)
853 //  -----------------------------------------------------------------------------
854 {
855     size_t count = 0;
856     if (!field.IsSetData() || !field.GetData().IsFields()) {
857         return 0;
858     }
859 
860     //
861     //  Each accession consists of yet another block of "Fields" one of which carries
862     //  a label named "accession":
863     //
864     ITERATE(CUser_field::TData::TFields, it, field.GetData().GetFields()) {
865         const CUser_field& uf = **it;
866         if (uf.CanGetData() && uf.GetData().IsFields()) {
867 
868             ITERATE(CUser_field::TData::TFields, it2, uf.GetData().GetFields()) {
869                 const CUser_field& inner = **it2;
870                 if (inner.IsSetLabel() && inner.GetLabel().IsStr()) {
871                     if (inner.GetLabel().GetStr() == "accession") {
872                         ++count;
873                     }
874                 }
875             }
876         }
877     }
878     return count;
879 }
880 
881 
882 //  ----------------------------------------------------------------------------
GetStringForModelEvidence(CMappedFeat mf,string & mestr)883 bool CWriteUtil::GetStringForModelEvidence(
884     CMappedFeat mf,
885     string& mestr)
886 //  ----------------------------------------------------------------------------
887 {
888     CConstRef<CUser_object> me = CWriteUtil::GetModelEvidence(mf);
889     if (!me) {
890         return false;
891     }
892 
893     size_t numRna(0), numEst(0), numProtein(0), numLongSra(0),
894         rnaseqBaseCoverage(0), rnaseqBiosamplesIntronsFull(0);
895     string method;
896     const CUser_object::TData& fields = me->GetData();
897     ITERATE(CUser_object::TData, it, fields) {
898         const CUser_field& field = **it;
899         if (!field.IsSetLabel()  ||  !field.GetLabel().IsStr()) {
900             continue;
901         }
902         if (!field.IsSetData()) {
903             continue;
904         }
905         const string& label = field.GetLabel().GetStr();
906         if (label == "Method") {
907             method = field.GetData().GetStr();
908             continue;
909         }
910         if (label == "Counts") {
911             ITERATE(CUser_field::TData::TFields, inner, field.GetData().GetFields()) {
912                 const CUser_field& field = **inner;
913                 if (!field.IsSetLabel() || !field.GetLabel().IsStr()) {
914                     continue;
915                 }
916                 if (!field.IsSetData()) {
917                     continue;
918                 }
919                 const string& label = field.GetLabel().GetStr();
920                 if (label == "mRNA") {
921                     numRna = field.GetData().GetInt();
922                     continue;
923                 }
924                 if (label == "EST") {
925                     numEst = field.GetData().GetInt();
926                     continue;
927                 }
928                 if (label == "Protein") {
929                     numProtein = field.GetData().GetInt();
930                     continue;
931                 }
932                 if (label == "long SRA read") {
933                     numLongSra = field.GetData().GetInt();
934                     continue;
935                 }
936             }
937         }
938         if (label == "mRNA") {
939             numRna = s_CountAccessions(field);
940             continue;
941         }
942         if (label == "EST") {
943             numEst = s_CountAccessions(field);
944             continue;
945         }
946         if (label == "Protein") {
947             numProtein = s_CountAccessions(field);
948             continue;
949         }
950         if (label == "long SRA read") {
951             numLongSra = s_CountAccessions(field);
952             continue;
953         }
954         if (label == "rnaseq_base_coverage") {
955             if (field.CanGetData()  &&  field.GetData().IsInt()) {
956                 rnaseqBaseCoverage = field.GetData().GetInt();
957             }
958             continue;
959         }
960         if (label == "rnaseq_biosamples_introns_full") {
961             if (field.CanGetData() && field.GetData().IsInt()) {
962                 rnaseqBiosamplesIntronsFull = field.GetData().GetInt();
963             }
964             continue;
965         }
966     }
967 
968     //CSeqFeatData::ESubtype st = mf.GetFeatSubtype();
969     CNcbiOstrstream text;
970     //text << "Derived by automated computational analysis";
971     //if (!NStr::IsBlank(method)) {
972     //    text << " using gene prediction method: " << method;
973     //}
974     //text << ".";
975 
976     if (numRna > 0 || numEst > 0 || numProtein > 0 || numLongSra > 0 ||
977         rnaseqBaseCoverage > 0)
978     {
979         text << "Supporting evidence includes similarity to:";
980     }
981     string section_prefix = " ";
982     // The countable section
983     if (numRna > 0 || numEst > 0 || numProtein > 0 || numLongSra > 0)
984     {
985         text << section_prefix;
986         string prefix;
987         if (numRna > 0) {
988             text << prefix << numRna << " mRNA";
989             if (numRna > 1) {
990                 text << 's';
991             }
992             prefix = ", ";
993         }
994         if (numEst > 0) {
995             text << prefix << numEst << " EST";
996             if (numEst > 1) {
997                 text << 's';
998             }
999             prefix = ", ";
1000         }
1001         if (numProtein > 0) {
1002             text << prefix << numProtein << " Protein";
1003             if (numProtein > 1) {
1004                 text << 's';
1005             }
1006             prefix = ", ";
1007         }
1008         if (numLongSra > 0) {
1009             text << prefix << numLongSra << " long SRA read";
1010             if (numLongSra > 1) {
1011                 text << 's';
1012             }
1013             prefix = ", ";
1014         }
1015         section_prefix = ", and ";
1016     }
1017     // The RNASeq section
1018     if (rnaseqBaseCoverage > 0)
1019     {
1020         text << section_prefix;
1021 
1022         text << rnaseqBaseCoverage << "% coverage of the annotated genomic feature by RNAseq alignments";
1023         if (rnaseqBiosamplesIntronsFull > 0) {
1024             text << ", including " << rnaseqBiosamplesIntronsFull;
1025             text << " sample";
1026             if (rnaseqBiosamplesIntronsFull > 1) {
1027                 text << 's';
1028             }
1029             text << " with support for all annotated introns";
1030         }
1031 
1032         section_prefix = ", and ";
1033     }
1034     mestr = CNcbiOstrstreamToString(text);
1035     return true;
1036 }
1037 
1038 
1039 //  ----------------------------------------------------------------------------
GetThreeFeatType(const CSeq_feat & feat,string & threeFeatType)1040 bool CWriteUtil::GetThreeFeatType(
1041     const CSeq_feat& feat,
1042     string& threeFeatType)
1043 //  ----------------------------------------------------------------------------
1044 {
1045     if (!feat.IsSetExts()) {
1046         return false;
1047     }
1048     auto pUo = CWriteUtil::GetUserObjectByType(feat.GetExts(), "BED");
1049     if (!pUo  ||  !pUo->HasField("location")) {
1050         return false;
1051     }
1052     threeFeatType = pUo->GetField("location").GetString();
1053     return true;
1054 }
1055 
1056 
1057 //  ----------------------------------------------------------------------------
GetThreeFeatScore(const CSeq_feat & feat,int & score)1058 bool CWriteUtil::GetThreeFeatScore(
1059     const CSeq_feat& feat,
1060     int& score)
1061 //  ----------------------------------------------------------------------------
1062 {
1063     if (!feat.IsSetExts()) {
1064         return false;
1065     }
1066     auto pUo = CWriteUtil::GetUserObjectByType(feat.GetExts(), "DisplaySettings");
1067     if (!pUo  ||  !pUo->HasField("score")) {
1068         return false;
1069     }
1070     score = pUo->GetField("score").GetInt();
1071     return true;
1072 }
1073 
1074 
1075 //  ----------------------------------------------------------------------------
GetThreeFeatRgb(const CSeq_feat & feat,string & color)1076 bool CWriteUtil::GetThreeFeatRgb(
1077     const CSeq_feat& feat,
1078     string& color)
1079 //  ----------------------------------------------------------------------------
1080 {
1081     if (!feat.IsSetExts()) {
1082         return false;
1083     }
1084     auto pUo = CWriteUtil::GetUserObjectByType(feat.GetExts(), "DisplaySettings");
1085     if (!pUo  ||  !pUo->HasField("color")) {
1086         return false;
1087     }
1088     color = pUo->GetField("color").GetString();
1089     return true;
1090 }
1091 
1092 
1093 //  ----------------------------------------------------------------------------
IsThreeFeatFormat(const CSeq_annot & annot)1094 bool CWriteUtil::IsThreeFeatFormat(
1095     const CSeq_annot& annot)
1096 //  ----------------------------------------------------------------------------
1097 {
1098     using FTABLE = list<CRef<CSeq_feat> >;
1099 
1100     if (!annot.IsFtable()) {
1101         return false;
1102     }
1103     const FTABLE& ftable = annot.GetData().GetFtable();
1104     auto remainingTests = 100;
1105     for (auto pFeat: ftable) {
1106         string dummy;
1107         if (!CWriteUtil::GetThreeFeatType(*pFeat, dummy)) {
1108             return false;
1109         }
1110         if (--remainingTests == 0) {
1111             break;
1112         }
1113     }
1114     return true;
1115 }
1116 
1117 
1118 //  ----------------------------------------------------------------------------
GetStringForGoMarkup(const vector<CRef<CUser_field>> & fields,string & goMarkup,bool relaxed)1119 bool CWriteUtil::GetStringForGoMarkup(
1120     const vector<CRef<CUser_field > >& fields,
1121     string& goMarkup,
1122     bool relaxed)
1123 //  ----------------------------------------------------------------------------
1124 {
1125     vector<string> strings;
1126     if (! CWriteUtil::GetStringsForGoMarkup(fields, strings, relaxed)) {
1127         return false;
1128     }
1129     goMarkup = NStr::Join(strings, ",");
1130     return true;
1131 }
1132 
1133 //  ----------------------------------------------------------------------------
GetStringsForGoMarkup(const vector<CRef<CUser_field>> & fields,vector<string> & goMarkup,bool relaxed)1134 bool CWriteUtil::GetStringsForGoMarkup(
1135     const vector<CRef<CUser_field > >& fields,
1136     vector<string>& goMarkup,
1137     bool relaxed)
1138 //  ----------------------------------------------------------------------------
1139 {
1140     goMarkup.clear();
1141     for (const auto& field: fields) {
1142         if (!field->IsSetLabel()  ||  !field->GetLabel().IsId()
1143                 ||  ( field->GetLabel().GetId() != 0 && ! relaxed)) {
1144             continue;
1145         }
1146         if (!field->IsSetData()  ||  !field->GetData().IsFields()) {
1147             continue;
1148         }
1149         string descriptive, goId, pubmedId, evidence;
1150         const auto& subFields = field->GetData().GetFields();
1151         for (const auto& subField: subFields) {
1152             if (!subField->IsSetLabel()  ||  ! subField->GetLabel().IsStr()) {
1153                 continue;
1154             }
1155             const auto& subLabel = subField->GetLabel().GetStr();
1156             if (subLabel == "text string") {
1157                 descriptive = subField->GetData().GetStr();
1158                 continue;
1159             }
1160             if (subLabel == "go id") {
1161                 goId = subField->GetData().GetStr();
1162                 continue;
1163             }
1164             if (subLabel == "pubmed id") {
1165                 pubmedId = NStr::IntToString(subField->GetData().GetInt());
1166                 continue;
1167             }
1168             if (subLabel == "evidence") {
1169                 evidence = subField->GetData().GetStr();
1170                 continue;
1171             }
1172         }
1173         goMarkup.push_back(descriptive + "|" + goId + "|" + pubmedId + "|" + evidence);
1174     }
1175     return true;
1176 }
1177 
1178 //  ----------------------------------------------------------------------------
GetListOfGoIds(const vector<CRef<CUser_field>> & fields,list<std::string> & goIds,bool relaxed)1179 bool CWriteUtil::GetListOfGoIds(
1180     const vector<CRef<CUser_field > >& fields,
1181     list<std::string>& goIds,
1182     bool relaxed)
1183 //  ----------------------------------------------------------------------------
1184 {
1185     for (const auto& field: fields) {
1186         if (!field->IsSetLabel()  ||  !field->GetLabel().IsId()
1187                 ||  ( field->GetLabel().GetId() != 0 && ! relaxed)) {
1188             continue;
1189         }
1190         if (!field->IsSetData()  ||  !field->GetData().IsFields()) {
1191             continue;
1192         }
1193         string descriptive, goId, pubmedId, evidence;
1194         const auto& subFields = field->GetData().GetFields();
1195         for (const auto& subField: subFields) {
1196             if (!subField->IsSetLabel()  ||  ! subField->GetLabel().IsStr()) {
1197                 continue;
1198             }
1199             const auto& subLabel = subField->GetLabel().GetStr();
1200             if (subLabel == "go id") {
1201                 goId = subField->GetData().GetStr();
1202                 goIds.push_back(string("GO:")+goId);
1203                 continue;
1204             }
1205         }
1206     }
1207     return true;
1208 }
1209 
1210 //  ----------------------------------------------------------------------------
CompareLocations(const CMappedFeat & lhs,const CMappedFeat & rhs)1211 bool CWriteUtil::CompareLocations(
1212     const CMappedFeat& lhs,
1213     const CMappedFeat& rhs)
1214 //  ----------------------------------------------------------------------------
1215 {
1216     const CSeq_loc& lhl = lhs.GetLocation();
1217     const CSeq_loc& rhl = rhs.GetLocation();
1218 
1219     //test1: id, alphabetical
1220     string lhs_id = CWriteUtil::GetStringId(lhl);
1221     string rhs_id = CWriteUtil::GetStringId(rhl);
1222     if (lhs_id != rhs_id) {
1223         return (lhs_id < rhs_id);
1224     }
1225 
1226     //test2: loc-start ascending
1227     size_t lhs_start = lhl.GetStart(ESeqLocExtremes::eExtreme_Positional);
1228     size_t rhs_start = rhl.GetStart(ESeqLocExtremes::eExtreme_Positional);
1229     if (lhs_start != rhs_start) {
1230         return (lhs_start < rhs_start);
1231     }
1232     //test3: loc-stop decending
1233     size_t lhs_stop = lhl.GetStop(ESeqLocExtremes::eExtreme_Positional);
1234     size_t rhs_stop = rhl.GetStop(ESeqLocExtremes::eExtreme_Positional);
1235     return (lhs_stop > rhs_stop);
1236 }
1237 
1238 
1239 //  ----------------------------------------------------------------------------
GetStringId(const CSeq_loc & loc)1240 string CWriteUtil::GetStringId(
1241     const CSeq_loc& loc)
1242 //  ----------------------------------------------------------------------------
1243 {
1244     if (loc.GetId()) {
1245         return loc.GetId()->AsFastaString();
1246     }
1247     return "";
1248 }
1249 
1250 //  ----------------------------------------------------------------------------
IsNucleotideSequence(CBioseq_Handle bsh)1251 bool CWriteUtil::IsNucleotideSequence(CBioseq_Handle bsh)
1252 //  ----------------------------------------------------------------------------
1253 {
1254     if (bsh.CanGetInst_Mol()) {
1255         const auto& mol = bsh.GetBioseqMolType();
1256         switch (mol) {
1257         default:
1258             break;
1259         case CSeq_inst::eMol_dna:
1260         case CSeq_inst::eMol_na:
1261         case CSeq_inst::eMol_rna:
1262             return true;
1263         case CSeq_inst::eMol_aa:
1264             return false;
1265         }
1266     }
1267     if (bsh.CanGetDescr()) {
1268         const auto& descrs = bsh.GetDescr().Get();
1269         for (const auto& pDescr: descrs) {
1270             if (pDescr->IsMolinfo()  &&  pDescr->GetMolinfo().CanGetBiomol()) {
1271                 switch(pDescr->GetMolinfo().GetBiomol()) {
1272                 case CMolInfo::eBiomol_unknown:
1273                 case CMolInfo::eBiomol_other:
1274                     break;
1275                 case CMolInfo::eBiomol_peptide:
1276                     return false;
1277                 default:
1278                     return true;
1279                 }
1280             }
1281         }
1282     }
1283     return false;
1284 }
1285 
1286 
1287 //  ----------------------------------------------------------------------------
IsProteinSequence(CBioseq_Handle bsh)1288 bool CWriteUtil::IsProteinSequence(CBioseq_Handle bsh)
1289 //  ----------------------------------------------------------------------------
1290 {
1291     if (bsh.CanGetInst_Mol()) {
1292         const auto& mol = bsh.GetBioseqMolType();
1293         switch (mol) {
1294         default:
1295             break;
1296         case CSeq_inst::eMol_dna:
1297         case CSeq_inst::eMol_na:
1298         case CSeq_inst::eMol_rna:
1299             return false;
1300         case CSeq_inst::eMol_aa:
1301             return true;
1302         }
1303     }
1304     if (bsh.CanGetDescr()) {
1305         const auto& descrs = bsh.GetDescr().Get();
1306         for (const auto& pDescr: descrs) {
1307             if (pDescr->IsMolinfo()  &&  pDescr->GetMolinfo().CanGetBiomol()) {
1308                 switch(pDescr->GetMolinfo().GetBiomol()) {
1309                 case CMolInfo::eBiomol_unknown:
1310                 case CMolInfo::eBiomol_other:
1311                     break;
1312                 case CMolInfo::eBiomol_peptide:
1313                     return true;
1314                 default:
1315                     return false;
1316                 }
1317             }
1318         }
1319     }
1320     return false;
1321 }
1322 
1323 //  ----------------------------------------------------------------------------
IsTransspliced(const CSeq_feat & feature)1324 bool CWriteUtil::IsTransspliced(const CSeq_feat& feature)
1325 //  ----------------------------------------------------------------------------
1326 {
1327     return (feature.IsSetExcept_text() && feature.GetExcept_text() == "trans-splicing");
1328 }
1329 
1330 
1331 //  ----------------------------------------------------------------------------
IsTransspliced(const CMappedFeat & mf)1332 bool CWriteUtil::IsTransspliced(const CMappedFeat& mf)
1333 //  ----------------------------------------------------------------------------
1334 {
1335     return CWriteUtil::IsTransspliced(mf.GetMappedFeature());
1336     //return (mf.IsSetExcept_text()  &&  mf.GetExcept_text() == "trans-splicing");
1337 }
1338 
1339 
1340 //  ----------------------------------------------------------------------------
GetTranssplicedEndpoints(const CSeq_loc & loc,unsigned int & inPoint,unsigned int & outPoint)1341 bool CWriteUtil::GetTranssplicedEndpoints(
1342 //  ----------------------------------------------------------------------------
1343     const CSeq_loc& loc,
1344     unsigned int& inPoint,
1345     unsigned int& outPoint)
1346 //  start determined by the minimum start of any sub interval
1347 //  stop determined by the maximum stop of any sub interval
1348 //  ----------------------------------------------------------------------------
1349 {
1350     typedef list<CRef<CSeq_interval> >::const_iterator CIT;
1351 
1352     CSeq_loc testLoc;
1353     testLoc.Assign(loc);
1354     if (testLoc.IsMix()) {
1355         testLoc.ChangeToPackedInt();
1356     }
1357     if (!testLoc.IsPacked_int()) {
1358         return false;
1359     }
1360     const CPacked_seqint& packedInt = testLoc.GetPacked_int();
1361     inPoint = packedInt.GetStart(eExtreme_Biological);
1362     outPoint = packedInt.GetStop(eExtreme_Biological);
1363     const list<CRef<CSeq_interval> >& intvs = packedInt.Get();
1364     for (CIT cit = intvs.begin(); cit != intvs.end(); cit++) {
1365         const CSeq_interval& intv = **cit;
1366         if (intv.GetFrom() < inPoint) {
1367             inPoint = intv.GetFrom();
1368         }
1369         if (intv.GetTo() > outPoint) {
1370             outPoint = intv.GetTo();
1371         }
1372     }
1373     return true;
1374 }
1375 
1376 //  ----------------------------------------------------------------------------
GetEffectiveStrand(const CSeq_interval & interval)1377 ENa_strand CWriteUtil::GetEffectiveStrand(
1378     const CSeq_interval& interval)
1379 //  ----------------------------------------------------------------------------
1380 {
1381     // if it's not explicitely minus, then it's plus
1382     //  (not true for other location types)
1383     return (interval.IsSetStrand() && interval.GetStrand() == eNa_strand_minus)
1384         ?
1385         eNa_strand_minus :
1386         eNa_strand_plus;
1387 }
1388 
1389 
1390 
1391 END_NCBI_SCOPE
1392