1 /*  $Id: feature_item.cpp 637281 2021-09-09 19:27:07Z ivanov $
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *               National Center for Biotechnology Information
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government have not placed any restriction on its use or reproduction.
13 *
14 *  Although all reasonable efforts have been taken to ensure the accuracy
15 *  and reliability of the software and data, the NLM and the U.S.
16 *  Government do not and cannot warrant the performance or results that
17 *  may be obtained by using this software or data. The NLM and the U.S.
18 *  Government disclaim all warranties, express or implied, including
19 *  warranties of performance, merchantability or fitness for any particular
20 *  purpose.
21 *
22 *  Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author:  Aaron Ucko, NCBI
27 *          Mati Shomrat
28 * Maintainer: Frank Ludwig
29 *
30 * File Description:
31 *   new (early 2003) flat-file generator -- representation of features
32 *   (mainly of interest to implementors)
33 *
34 *
35 * WHEN EDITING THE LIST OF QUALIFIERS:
36 *
37 * - there is currently a lot of parallel logic for the FTable case
38 *   (CFeatureItem::x_AddFTableQuals()) and the standard case
39 *   (CFeatureItem::x_Add...Quals()). Make sure to edit both cases as
40 *   appropriate.
41 * ===========================================================================
42 */
43 #include <ncbi_pch.hpp>
44 #include <corelib/ncbistd.hpp>
45 #include <serial/iterator.hpp>
46 #include <serial/enumvalues.hpp>
47 
48 #include <algorithm>
49 #include <sstream>
50 
51 #include <objects/seq/Bioseq.hpp>
52 #include <objects/seq/Heterogen.hpp>
53 #include <objects/seq/MolInfo.hpp>
54 #include <objects/seq/seq_id_handle.hpp>
55 #include <objects/seq/Annot_descr.hpp>
56 #include <objects/seq/Annotdesc.hpp>
57 #include <objects/seq/Seq_literal.hpp>
58 #include <objects/seq/seqport_util.hpp>
59 #include <objects/seqfeat/Org_ref.hpp>
60 #include <objects/seqfeat/OrgName.hpp>
61 #include <objects/seqfeat/OrgMod.hpp>
62 #include <objects/seqfeat/PCRPrimerSet.hpp>
63 #include <objects/seqfeat/PCRPrimer.hpp>
64 #include <objects/seqfeat/PCRReaction.hpp>
65 #include <objects/seqfeat/PCRReactionSet.hpp>
66 #include <objects/seqfeat/Code_break.hpp>
67 #include <objects/seqfeat/Delta_item.hpp>
68 #include <objects/seqfeat/Gb_qual.hpp>
69 #include <objects/seqfeat/Gene_nomenclature.hpp>
70 #include <objects/seqfeat/Genetic_code.hpp>
71 #include <objects/seqfeat/Genetic_code_table.hpp>
72 #include <objects/seqfeat/Imp_feat.hpp>
73 #include <objects/seqfeat/RNA_ref.hpp>
74 #include <objects/seqfeat/RNA_gen.hpp>
75 #include <objects/seqfeat/RNA_qual_set.hpp>
76 #include <objects/seqfeat/RNA_qual.hpp>
77 #include <objects/seqfeat/Trna_ext.hpp>
78 #include <objects/seqfeat/Feat_id.hpp>
79 #include <objects/seqfeat/SeqFeatXref.hpp>
80 #include <objects/seqfeat/Variation_ref.hpp>
81 #include <objects/seqfeat/Variation_inst.hpp>
82 #include <objects/seqloc/Seq_loc.hpp>
83 #include <objects/seqloc/Seq_point.hpp>
84 #include <objects/seqloc/Seq_interval.hpp>
85 #include <objects/seqloc/Packed_seqpnt.hpp>
86 #include <objects/seqloc/Textseq_id.hpp>
87 #include <objects/general/Object_id.hpp>
88 #include <objects/misc/sequence_macros.hpp>
89 
90 #include <objmgr/scope.hpp>
91 #include <objmgr/object_manager.hpp>
92 #include <objmgr/seqdesc_ci.hpp>
93 #include <objmgr/seq_vector.hpp>
94 #include <objmgr/util/sequence.hpp>
95 #include <objmgr/util/feature.hpp>
96 #include <objmgr/util/weight.hpp>
97 #include <objmgr/util/seq_loc_util.hpp>
98 
99 #include <util/static_set.hpp>
100 #include <util/static_map.hpp>
101 #include <util/sequtil/sequtil.hpp>
102 #include <util/sequtil/sequtil_convert.hpp>
103 
104 #include <algorithm>
105 #include <objtools/data_loaders/genbank/gbloader.hpp>
106 #include <objtools/format/formatter.hpp>
107 #include <objtools/format/items/feature_item.hpp>
108 #include <objtools/format/items/gene_finder.hpp>
109 #include <objtools/format/context.hpp>
110 #include <objtools/format/items/qualifiers.hpp>
111 #include <objmgr/util/objutil.hpp>
112 #include "inst_info_map.hpp"
113 
114 // On Mac OS X 10.3, FixMath.h defines ff as a one-argument macro(!)
115 #ifdef ff
116 #  undef ff
117 #endif
118 
119 BEGIN_NCBI_SCOPE
120 BEGIN_SCOPE(objects)
121 USING_SCOPE(sequence);
122 
123 class CGoQualLessThan
124 {
125 public:
operator ()(const CConstRef<CFlatGoQVal> & obj1,const CConstRef<CFlatGoQVal> & obj2)126     bool operator() ( const CConstRef<CFlatGoQVal> &obj1, const CConstRef<CFlatGoQVal> &obj2 )
127     {
128         const CFlatGoQVal *qval1 = obj1.GetNonNullPointer();
129         const CFlatGoQVal *qval2 = obj2.GetNonNullPointer();
130 
131         // sort by text string
132         const string &str1 = qval1->GetTextString();
133         const string &str2 = qval2->GetTextString();
134 
135         int textComparison = 0;
136 
137         // This whole paragraph should eventually be replaced with a mere NStr::CompareNocase stored into textComparison
138         // We can't just use NStr::CompareNocase, because that compares using tolower, whereas
139         // we must compare with toupper to maintain compatibility with C.
140         SIZE_TYPE pos = 0;
141         const SIZE_TYPE min_length = min( str1.length(), str2.length() );
142         for( ; pos < min_length; ++pos ) {
143             textComparison = toupper( str1[pos] ) - toupper( str2[pos] );
144             if( textComparison != 0 ) {
145                 break;
146             }
147         }
148         if( 0 == textComparison ) {
149             // if we reached the end, compare via length (shorter first)
150             textComparison = str1.length() - str2.length();
151         }
152 
153         // compare by text, if possible
154         if( textComparison < 0 ) {
155             return true;
156         } else if( textComparison > 0 ) {
157             return false;
158         }
159 
160         // if text is tied, then sort by pubmed id, if any
161         int pmid1 = qval1->GetPubmedId();
162         int pmid2 = qval2->GetPubmedId();
163 
164         if( 0 == pmid1 ) {
165             return false;
166         } else if( 0 == pmid2 ) {
167             return true;
168         } else {
169             return pmid1 < pmid2;
170         }
171     }
172 };
173 
174 // -- static functions
175 
s_ValidId(const CSeq_id & id)176 static bool s_ValidId(const CSeq_id& id)
177 {
178     return id.IsGenbank()  ||  id.IsEmbl()    ||  id.IsDdbj()  ||
179            id.IsOther()    ||  id.IsPatent()  ||
180            id.IsTpg()      ||  id.IsTpe()     ||  id.IsTpd()   ||
181            id.IsGpipe();
182 }
183 
184 static
s_StrEqualDisregardFinalPeriod(const string & s1,const string & s2,NStr::ECase use_case)185 bool s_StrEqualDisregardFinalPeriod(
186     const string &s1, const string &s2,
187     NStr::ECase use_case )
188 {
189     if( s1.empty() || s2.empty() ) {
190         return s1.empty() && s2.empty();
191     }
192 
193     // set length to disregard final period, if any
194     size_t s1_len = s1.length();
195     if( s1[s1_len-1] == '.' ) {
196         --s1_len;
197     }
198     size_t s2_len = s2.length();
199     if( s2[s2_len-1] == '.' ) {
200         --s2_len;
201     }
202 
203     if( s1_len != s2_len ) {
204         return false;
205     }
206 
207     // NStr::Equal does not have exactly the function I want,
208     // so I have to make my own.
209     for( size_t ii = 0; ii < s1_len ; ++ii ) {
210         const char ch1 = ( use_case == NStr::eNocase ? toupper(s1[ii]) : s1[ii] );
211         const char ch2 = ( use_case == NStr::eNocase ? toupper(s2[ii]) : s2[ii] );
212         if( ch1 != ch2 ) {
213             return false;
214         }
215     }
216     return true;
217 }
218 
s_CheckQuals_cdregion(const CMappedFeat & feat,const CSeq_loc & loc,CBioseqContext & ctx)219 static bool s_CheckQuals_cdregion(const CMappedFeat& feat,
220                                   const CSeq_loc& loc,
221                                   CBioseqContext& ctx)
222 {
223     if ( !ctx.Config().CheckCDSProductId() ) {
224         return true;
225     }
226 
227     CScope& scope = ctx.GetScope();
228 
229     // non-pseudo CDS must have /product
230     bool pseudo = feat.IsSetPseudo()  &&  feat.GetPseudo() ;
231     if ( !pseudo ) {
232         const CGene_ref* grp = feat.GetGeneXref();
233         if ( grp == NULL ) {
234             CConstRef<CSeq_feat> gene = GetOverlappingGene(loc, scope);
235             if (gene) {
236                 pseudo = gene->IsSetPseudo()  &&  gene->GetPseudo();
237                 if ( !pseudo ) {
238                     grp = &(gene->GetData().GetGene());
239                 }
240             }
241         }
242         if ( !pseudo  &&  grp != NULL ) {
243             pseudo = grp->GetPseudo();
244         }
245     }
246 
247     bool just_stop = false;
248     const CSeq_loc& Loc = feat.GetLocation();
249     if ( Loc.IsPartialStart(eExtreme_Biological)  &&  !Loc.IsPartialStop(eExtreme_Biological) ) {
250         if ( GetLength(Loc, &scope) <= 5 ) {
251             just_stop = true;
252         }
253     }
254 
255     if ( pseudo ||  just_stop ) {
256         return true;
257     }
258 
259     // make sure the product has a valid accession
260     if (feat.IsSetProduct()) {
261         CConstRef<CSeq_id> id;
262         try {
263             id.Reset(&(GetId(feat.GetProduct(), &scope)));
264         } catch ( CException& ) {
265             id.Reset(NULL);
266         }
267         if (id) {
268             if ((id->IsGi()  &&  id->GetGi() > ZERO_GI) ||  id->IsLocal()) {
269                 CBioseq_Handle prod = scope.GetBioseqHandleFromTSE(*id, ctx.GetHandle());
270                 if (prod) {
271                     ITERATE (CBioseq_Handle::TId, it, prod.GetId()) {
272                         if (s_ValidId(*it->GetSeqId())) {
273                             CConstRef<CTextseq_id> tsip(it->GetSeqId()->GetTextseq_Id());
274                             if (tsip  &&  tsip->IsSetAccession()  &&
275                                 IsValidAccession(tsip->GetAccession())) {
276                                 return true;
277                             }
278                         }
279                     }
280                 } else if (id->IsGi()  &&  id->GetGi() > ZERO_GI) {
281                     // RELEASE_MODE requires that /protein_id is an accession
282                     if (ctx.Config().IsModeRelease()) {
283                         try {
284                             if (IsValidAccession(GetAccessionForGi(id->GetGi(), scope))) {
285                                 return true;
286                             }
287                         } catch (CException&) {
288                         }
289                     }
290                 }
291             } else if (s_ValidId(*id)) {
292                 CConstRef<CTextseq_id> tsip(id->GetTextseq_Id());
293                 if (tsip  &&  tsip->IsSetAccession()  &&
294                     IsValidAccession(tsip->GetAccession())) {
295                     return true;
296                 }
297             }
298         }
299     } else {  // no product
300         if (feat.IsSetExcept()  &&  feat.GetExcept()  &&
301             feat.IsSetExcept_text() ) {
302             if (NStr::Find(feat.GetExcept_text(),
303                     "rearrangement required for product") != NPOS) {
304                 return true;
305             }
306         }
307     }
308 
309     return false;
310 }
311 
312 
313 
s_HasPub(const CMappedFeat & feat,CBioseqContext & ctx)314 static bool s_HasPub(const CMappedFeat& feat, CBioseqContext& ctx)
315 {
316     ITERATE(CBioseqContext::TReferences, it, ctx.GetReferences()) {
317         if ((*it)->Matches(feat.GetCit())) {
318             return true;
319         }
320     }
321 
322     return false;
323 }
324 
325 
s_HasCompareOrCitation(const CMappedFeat & feat,CBioseqContext & ctx)326 static bool s_HasCompareOrCitation(const CMappedFeat& feat, CBioseqContext& ctx)
327 {
328     // check for /compare
329     if (!NStr::IsBlank(feat.GetNamedQual("compare"))) {
330         return true;
331     }
332 
333     // check for /citation
334     if (feat.IsSetCit()) {
335         return s_HasPub(feat, ctx);
336     }
337 
338     return false;
339 }
340 
341 
342 // conflict requires /citation or /compare
s_CheckQuals_conflict(const CMappedFeat & feat,CBioseqContext & ctx)343 static bool s_CheckQuals_conflict(const CMappedFeat& feat, CBioseqContext& ctx)
344 {
345     // RefSeq allows conflict with accession in comment instead of sfp->cit
346     if (ctx.IsRefSeq()  &&
347         feat.IsSetComment()  &&  !NStr::IsBlank(feat.GetComment())) {
348         return true;
349     }
350 
351     return s_HasCompareOrCitation(feat, ctx);
352 }
353 
354 // old_sequence requires /citation or /compare
s_CheckQuals_old_seq(const CMappedFeat & feat,CBioseqContext & ctx)355 static bool s_CheckQuals_old_seq(const CMappedFeat& feat, CBioseqContext& ctx)
356 {
357     return s_HasCompareOrCitation(feat, ctx);
358 }
359 
360 
s_CheckQuals_gene(const CMappedFeat & feat)361 static bool s_CheckQuals_gene(const CMappedFeat& feat)
362 {
363     // gene requires /gene or /locus_tag, but desc or syn can be mapped to /gene
364     const CSeqFeatData::TGene& gene = feat.GetData().GetGene();
365     if ( (gene.IsSetLocus()      &&  !gene.GetLocus().empty())      ||
366          (gene.IsSetLocus_tag()  &&  !gene.GetLocus_tag().empty())  ||
367          (gene.IsSetDesc()       &&  !gene.GetDesc().empty())       ||
368          (!gene.GetSyn().empty()  &&  !gene.GetSyn().front().empty()) ) {
369         return true;
370     }
371 
372     return false;
373 }
374 
375 
s_CheckQuals_bind(const CMappedFeat & feat)376 static bool s_CheckQuals_bind(const CMappedFeat& feat)
377 {
378     // protein_bind or misc_binding require eFQ_bound_moiety
379     return !NStr::IsBlank(feat.GetNamedQual("bound_moiety"));
380 }
381 
382 
s_CheckQuals_mod_base(const CMappedFeat & feat)383 static bool s_CheckQuals_mod_base(const CMappedFeat& feat)
384 {
385     // modified_base requires eFQ_mod_base
386     return !NStr::IsBlank(feat.GetNamedQual("mod_base"));
387 }
388 
389 
s_CheckQuals_gap(const CMappedFeat & feat)390 static bool s_CheckQuals_gap(const CMappedFeat& feat)
391 {
392     // gap feature must have /estimated_length qual
393     return !feat.GetNamedQual("estimated_length").empty();
394 }
395 
s_CheckQuals_assembly_gap(const CMappedFeat & feat)396 static bool s_CheckQuals_assembly_gap(const CMappedFeat& feat)
397 {
398     // assembly_gap feature must have /estimated_length qual
399     // and /gap_type
400     return ! feat.GetNamedQual("estimated_length").empty() &&
401         ! feat.GetNamedQual("gap_type").empty();
402 }
403 
404 
s_CheckQuals_ncRNA(const CMappedFeat & feat)405 static bool s_CheckQuals_ncRNA(const CMappedFeat& feat)
406 {
407     if( !NStr::IsBlank(feat.GetNamedQual("ncRNA_class")) ) {
408         return true;
409     }
410 
411     // Look at this mess; if only we could use sequence_macros.hpp
412     if( feat.GetData().GetRna().IsSetExt() &&
413         feat.GetData().GetRna().GetExt().IsGen() &&
414         feat.GetData().GetRna().GetExt().GetGen().IsSetClass() &&
415         !NStr::IsBlank(feat.GetData().GetRna().GetExt().GetGen().GetClass()) )
416     {
417         return true;
418     }
419 
420     return false;
421 }
422 
423 
s_CheckQuals_regulatory(const CMappedFeat & feat)424 static bool s_CheckQuals_regulatory(const CMappedFeat& feat)
425 {
426     // regulatory feature must have /regulatory_class qual
427     return ! feat.GetNamedQual("regulatory_class").empty();
428 }
429 
430 
s_CheckMandatoryQuals(const CMappedFeat & feat,const CSeq_loc & loc,CBioseqContext & ctx)431 static bool s_CheckMandatoryQuals(const CMappedFeat& feat,
432                                   const CSeq_loc& loc,
433                                   CBioseqContext& ctx)
434 {
435     switch ( feat.GetData().GetSubtype() ) {
436     case CSeqFeatData::eSubtype_cdregion:
437         {
438             return s_CheckQuals_cdregion(feat, loc, ctx);
439         }
440     case CSeqFeatData::eSubtype_conflict:
441         {
442             return s_CheckQuals_conflict(feat, ctx);
443         }
444     case CSeqFeatData::eSubtype_old_sequence:
445         {
446             return s_CheckQuals_old_seq(feat, ctx);
447         }
448     case CSeqFeatData::eSubtype_gene:
449         {
450             return s_CheckQuals_gene(feat);
451         }
452     case CSeqFeatData::eSubtype_protein_bind:
453     case CSeqFeatData::eSubtype_misc_binding:
454         {
455             return s_CheckQuals_bind(feat);
456         }
457     case CSeqFeatData::eSubtype_modified_base:
458         {
459             return s_CheckQuals_mod_base(feat);
460         }
461     case CSeqFeatData::eSubtype_gap:
462         {
463             return s_CheckQuals_gap(feat);
464         }
465     case CSeqFeatData::eSubtype_assembly_gap:
466         {
467             return s_CheckQuals_assembly_gap(feat);
468         }
469     case CSeqFeatData::eSubtype_ncRNA:
470         {
471             return s_CheckQuals_ncRNA(feat);
472         }
473     case CSeqFeatData::eSubtype_regulatory:
474         {
475             return s_CheckQuals_regulatory(feat);
476         }
477     default:
478         break;
479     }
480 
481     return true;
482 }
483 
s_SkipFeature(const CMappedFeat & feat,const CSeq_loc & loc,CBioseqContext & ctx)484 static bool s_SkipFeature(const CMappedFeat& feat,
485                           const CSeq_loc& loc,
486                           CBioseqContext& ctx)
487 {
488     CSeqFeatData::E_Choice type    = feat.GetData().Which();
489     CSeqFeatData::ESubtype subtype = feat.GetData().GetSubtype();
490 
491     if ( subtype == CSeqFeatData::eSubtype_pub              ||
492       /* subtype == CSeqFeatData::eSubtype_non_std_residue  || */
493          subtype == CSeqFeatData::eSubtype_biosrc           ||
494          subtype == CSeqFeatData::eSubtype_rsite            ||
495          subtype == CSeqFeatData::eSubtype_seq ) {
496         return true;
497     }
498 
499     const CFlatFileConfig& cfg = ctx.Config();
500 
501     // check feature customization flags
502     if ( cfg.ValidateFeatures()  &&
503         (subtype == CSeqFeatData::eSubtype_bad  ||
504          subtype == CSeqFeatData::eSubtype_virion) ) {
505         return true;
506     }
507 
508     if ( cfg.ValidateFeatures() && type == CSeqFeatData::e_Imp ) {
509         switch ( subtype ) {
510         default:
511             break;
512         case CSeqFeatData::eSubtype_imp:
513         case CSeqFeatData::eSubtype_site_ref:
514         case CSeqFeatData::eSubtype_gene:
515         case CSeqFeatData::eSubtype_mutation:
516         case CSeqFeatData::eSubtype_allele:
517             return true;
518         }
519     }
520 
521     if ( ctx.IsNuc()  &&  subtype == CSeqFeatData::eSubtype_het ) {
522         return true;
523     }
524 
525     if ( cfg.HideImpFeatures()  &&  type == CSeqFeatData::e_Imp ) {
526         return true;
527     }
528 
529     if ( cfg.HideMiscFeatures() ) {
530         if ( type == CSeqFeatData::e_Site ||
531             type == CSeqFeatData::e_Bond ||
532             type == CSeqFeatData::e_Region ||
533             type == CSeqFeatData::e_Comment ||
534             subtype == CSeqFeatData::eSubtype_misc_feature ||
535             subtype == CSeqFeatData::eSubtype_preprotein ) {
536             return true;
537         }
538     }
539 
540     if ( cfg.HideExonFeatures()  &&  subtype == CSeqFeatData::eSubtype_exon ) {
541         return true;
542     }
543 
544     if ( cfg.HideIntronFeatures()  &&  subtype == CSeqFeatData::eSubtype_intron ) {
545         return true;
546     }
547 
548     if ( cfg.HideRemoteImpFeatures()  &&  type == CSeqFeatData::e_Imp ) {
549         if ( subtype == CSeqFeatData::eSubtype_variation  ||
550              subtype == CSeqFeatData::eSubtype_exon       ||
551              subtype == CSeqFeatData::eSubtype_intron     ||
552              subtype == CSeqFeatData::eSubtype_misc_feature ) {
553             return true;
554         }
555     }
556 
557     if ( cfg.GeneRNACDSFeatures() ) {
558         if ( type != CSeqFeatData::e_Gene &&
559             type != CSeqFeatData::e_Rna &&
560             type != CSeqFeatData::e_Cdregion ) {
561             return true;
562         }
563     }
564 
565     // skip genes in DDBJ format
566     if ( cfg.IsFormatDDBJ()  &&  type == CSeqFeatData::e_Gene ) {
567         return true;
568     }
569 
570     // if RELEASE mode, make sure we have all info to create mandatory quals.
571     if ( cfg.NeedRequiredQuals() ) {
572         return !s_CheckMandatoryQuals(feat, loc, ctx);
573     }
574 
575     return false;
576 }
577 
578 class BadECNumberChar {
579 public:
operator ()(const char ch)580     bool operator()( const char ch )
581     {
582         return( ! isdigit(ch) && ch != '.' && ch != '-' );
583     }
584 };
585 
586 // acceptable patterns are: (This might not be true anymore.  Check the code. )
587 // num.num.num.num
588 // num.num.num.-
589 // num.num.-.-
590 // num.-.-.-
591 // -.-.-.-
592 // (You can use "n" instead of "-" )
s_IsLegalECNumber(const string & ec_number)593 static bool s_IsLegalECNumber(const string& ec_number)
594 {
595   if ( ec_number.empty() ) return false;
596 
597   bool is_ambig = false;
598   int numperiods = 0;
599   int numdigits = 0;
600   int numdashes = 0;
601 
602   ITERATE( string, ec_iter, ec_number ) {
603     if ( isdigit(*ec_iter) ) {
604       numdigits++;
605       if (is_ambig) return false;
606     } else if (*ec_iter == '-' ) {
607       numdashes++;
608       is_ambig = true;
609     } else if( *ec_iter == 'n') {
610         string::const_iterator ec_iter_next = ec_iter;
611         ++ec_iter_next;
612         if( ec_iter_next != ec_number.end() && numperiods == 3 && numdigits == 0 && isdigit(*ec_iter_next) ) {
613             // allow/ignore n in first position of fourth number to not mean ambiguous, if followed by digit
614         } else {
615             numdashes++;
616             is_ambig = true;
617         }
618     } else if (*ec_iter == '.') {
619       numperiods++;
620       if (numdigits > 0 && numdashes > 0) return false;
621       if (numdigits == 0 && numdashes == 0) return false;
622       if (numdashes > 1) return false;
623       numdigits = 0;
624       numdashes = 0;
625     }
626   }
627 
628   if (numperiods == 3) {
629     if (numdigits > 0 && numdashes > 0) return false;
630     if (numdigits > 0 || numdashes == 1) return true;
631   }
632 
633   return false;
634 }
635 
636 
s_GetBondName(CSeqFeatData::TBond bond)637 static const string& s_GetBondName(CSeqFeatData::TBond bond)
638 {
639     static const string kOther = "unclassified";
640     return (bond == CSeqFeatData::eBond_other) ? kOther :
641         CSeqFeatData::ENUM_METHOD_NAME(EBond)()->FindName(bond, true);
642 }
643 
s_QualVectorToNote(const CFlatFeature::TQuals & qualVector,bool noRedundancy,string & note,string & punctuation,bool & addPeriod)644 static void s_QualVectorToNote(
645     const CFlatFeature::TQuals& qualVector,
646     bool noRedundancy,
647     string& note,
648     string& punctuation,
649     bool& addPeriod)
650 {
651     // is there at least one note which is more than blank or a period?
652     bool hasSubstantiveNote = false;
653     // store this so we can chop off the extra stuff we added if there was no note of substance
654     const string::size_type original_length = note.length();
655 
656     string prefix;
657     ITERATE (CFlatFeature::TQuals, it, qualVector) {
658         const string& qual = (*it)->GetValue();
659 
660         prefix.erase();
661         if ( !note.empty() ) {
662             prefix = punctuation;
663             const string& next_prefix = (*it)->GetPrefix();
664             if (!NStr::EndsWith(prefix, '\n') ) {
665                 prefix += next_prefix;
666             }
667         }
668 
669         if( !qual.empty() && qual != "." ) {
670             hasSubstantiveNote = true;
671         }
672 
673         // A qual may declare that it be shown even if redundant and override the
674         // given noRedundancy variable
675         const bool noRedundancyThisIteration =
676             ( 0 != ( (*it)->GetFlags() & CFormatQual::fFlags_showEvenIfRedund ) ? false : noRedundancy );
677         JoinString(note, prefix, qual, noRedundancyThisIteration );
678 
679         addPeriod = (*it)->GetAddPeriod();
680         punctuation = (*it)->GetSuffix();
681     }
682 
683     // if there was no meaningful note, we clear it
684     if( ! hasSubstantiveNote ) {
685         note.resize( original_length );
686     }
687 }
688 
689 
s_NoteFinalize(bool addPeriod,string & noteStr,CFlatFeature & flatFeature,ETildeStyle style=eTilde_newline)690 static void s_NoteFinalize(
691    bool addPeriod,
692    string& noteStr,
693    CFlatFeature& flatFeature,
694    ETildeStyle style = eTilde_newline ) {
695 
696     if (!noteStr.empty()) {
697         if (addPeriod  &&  !NStr::EndsWith(noteStr, ".")) {
698 
699             AddPeriod(noteStr);
700         }
701         // Policy change: expand tilde on both descriptors and features
702         ExpandTildes(noteStr, style);
703         TrimSpacesAndJunkFromEnds( noteStr, true );
704 
705         CRef<CFormatQual> note(new CFormatQual("note", noteStr));
706         flatFeature.SetQuals().push_back(note);
707     }
708 }
709 
s_GetOverlap(const CMappedFeat & feat)710 static int s_GetOverlap(const CMappedFeat& feat )
711 {
712     if (feat) {
713         int total_length = 0;
714         ITERATE( CSeq_loc, loc_iter, feat.GetLocation() ) {
715             total_length += loc_iter.GetRange().GetLength();
716         }
717         return total_length;
718     }
719     return 0;
720 }
721 
722 
723 ///
724 ///  The best protein feature is defined as the one that has the most overlap
725 ///  with the given DNA.
726 ///  If there is a tie between two protein features in overlap then the one
727 ///  with the lesser processing status is declared the winner.
728 ///
s_GetBestProtFeature(const CBioseq_Handle & seq)729 static CMappedFeat s_GetBestProtFeature(const CBioseq_Handle& seq)
730 {
731     SAnnotSelector sel(CSeqFeatData::e_Prot);
732     sel.SetLimitTSE(seq.GetTSE_Handle());
733 
734     CMappedFeat best;
735     CProt_ref::TProcessed best_processed = CProt_ref::eProcessed_transit_peptide;
736     int best_overlap = 0;
737 
738     for (CFeat_CI it(seq, sel);  it;  ++it) {
739 
740         if ( !best ) {
741 
742             best = *it;
743             best_processed = it->GetData().GetProt().GetProcessed();
744             best_overlap = s_GetOverlap(best);
745 
746         } else {
747 
748             int current_overlap = s_GetOverlap(*it);
749             CProt_ref::TProcessed current_processed = it->GetData().GetProt().GetProcessed();
750 
751             if ( best_overlap < current_overlap ) {
752 
753                 best_overlap = current_overlap;
754                 best_processed = current_processed;
755                 best = *it;
756 
757             } else if ( (best_overlap == current_overlap) && (best_processed > current_processed) ) {
758 
759                 best_processed = current_processed;
760                 best = *it;
761             }
762         }
763     }
764     return best;
765 }
766 
767 // -- FeatureHeader
768 
CFeatHeaderItem(CBioseqContext & ctx)769 CFeatHeaderItem::CFeatHeaderItem(CBioseqContext& ctx) : CFlatItem(&ctx)
770 {
771     x_GatherInfo(ctx);
772 }
773 
GetItemType(void) const774 IFlatItem::EItem CFeatHeaderItem::GetItemType(void) const
775 {
776     return eItem_FeatHeader;
777 }
778 
x_GatherInfo(CBioseqContext & ctx)779 void CFeatHeaderItem::x_GatherInfo(CBioseqContext& ctx)
780 {
781     if ( ctx.Config().IsFormatFTable() ) {
782         m_Id.Reset(ctx.GetPrimaryId());
783     }
784 }
785 
s_CheckFuzz(const CInt_fuzz & fuzz)786 static bool s_CheckFuzz(const CInt_fuzz& fuzz)
787 {
788     return !(fuzz.IsLim()  &&  fuzz.GetLim() == CInt_fuzz::eLim_unk);
789 }
790 
s_LocIsFuzz(const CMappedFeat & feat,const CSeq_loc & loc)791 static bool s_LocIsFuzz(const CMappedFeat& feat, const CSeq_loc& loc)
792 {
793     if ( feat.GetData().GetSubtype() == CSeqFeatData::eSubtype_imp  &&
794          feat.GetData().IsImp() ) {  // unmappable impfeats
795         const CSeqFeatData::TImp& imp = feat.GetData().GetImp();
796         if ( imp.IsSetLoc() ) {
797             const string& imploc = imp.GetLoc();
798             if ( imploc.find('<') != NPOS  ||  imploc.find('>') != NPOS ) {
799                 return true;
800             }
801         }
802     } else {    // any regular feature test location for fuzz
803         for ( CSeq_loc_CI it(loc, CSeq_loc_CI::eEmpty_Allow); it; ++it ) {
804             const CSeq_loc& l = it.GetEmbeddingSeq_loc();
805             switch ( l.Which() ) {
806             case CSeq_loc::e_Pnt:
807             {{
808                 if ( l.GetPnt().IsSetFuzz() ) {
809                     if ( s_CheckFuzz(l.GetPnt().GetFuzz()) ) {
810                         return true;
811                     }
812                 }
813                 break;
814             }}
815             case CSeq_loc::e_Packed_pnt:
816             {{
817                 if ( l.GetPacked_pnt().IsSetFuzz() ) {
818                     if ( s_CheckFuzz(l.GetPacked_pnt().GetFuzz()) ) {
819                         return true;
820                     }
821                 }
822                 break;
823             }}
824             case CSeq_loc::e_Int:
825             {{
826                 bool fuzz = false;
827                 if ( l.GetInt().IsSetFuzz_from() ) {
828                     fuzz = s_CheckFuzz(l.GetInt().GetFuzz_from());
829                 }
830                 if ( !fuzz  &&  l.GetInt().IsSetFuzz_to() ) {
831                     fuzz = s_CheckFuzz(l.GetInt().GetFuzz_to());
832                 }
833                 if ( fuzz ) {
834                     return true;
835                 }
836                 break;
837             }}
838             case CSeq_loc::e_Packed_int:
839             {{
840                 if ( l.GetPacked_int().IsPartialStart(eExtreme_Biological)
841                   || l.GetPacked_int().IsPartialStop(eExtreme_Biological) ) {
842                     return true;
843                 }
844                 break;
845             }}
846             case CSeq_loc::e_Null:
847             {{
848                 return true;
849             }}
850             default:
851                 break;
852             }
853         }
854     }
855 
856     return false;
857 }
858 
s_AddPcrPrimersQualsAppend(string & output,const string & name,const string & str)859 static void s_AddPcrPrimersQualsAppend( string &output, const string &name, const string &str )
860 {
861     if( ! str.empty() ) {
862         if( ! output.empty() ) {
863             output += ", ";
864         }
865         output += name + str;
866     }
867 }
868 
869 // This splits a string that's comma-separated with parens at start and end
870 // (or, string might just contain a single string, so no splitting is needed,
871 // in which case the output_vec will be of size 1)
s_SplitCommaSeparatedStringInParens(vector<string> & output_vec,const string & string_to_split)872 static void s_SplitCommaSeparatedStringInParens( vector<string> &output_vec, const string &string_to_split )
873 {
874     // nothing to do since no input
875     if( string_to_split.empty() ) {
876         return;
877     }
878 
879     // no splitting required
880     if( string_to_split[0] != '(' ) {
881         output_vec.push_back( string_to_split );
882         return;
883     }
884 
885     // if ends with closing paren, chop that off.
886     // ( It's actually a data error if we DON'T end with a ')', but we continue anyway, since
887     // we want to do the best we can with the data we get. )
888     size_t amount_to_chop_off_end = 0;
889     if( string_to_split[string_to_split.length() - 1] == ')' ) {
890         amount_to_chop_off_end = 1;
891     }
892 
893     NStr::Split( string_to_split.substr( 1, string_to_split.length() - amount_to_chop_off_end - 1), ",", output_vec, 0 );
894 }
895 
896 static const char* const sc_ValidPseudoGene[] = {
897     "allelic",
898     "processed",
899     "unitary",
900     "unknown",
901     "unprocessed"
902 };
903 typedef CStaticArraySet<const char*, PNocase> TLegalPseudoGeneText;
904 DEFINE_STATIC_ARRAY_MAP(TLegalPseudoGeneText, sc_ValidPseudoGeneText, sc_ValidPseudoGene );
905 
s_IsValidPseudoGene(objects::CFlatFileConfig::TMode mode,const string & text)906 static bool s_IsValidPseudoGene( objects::CFlatFileConfig::TMode mode, const string& text)
907 {
908     switch(mode)
909     {
910     case objects::CFlatFileConfig::eMode_Release:
911     case objects::CFlatFileConfig::eMode_Entrez:
912         return sc_ValidPseudoGeneText.find(text.c_str()) != sc_ValidPseudoGeneText.end();
913     default:
914         return ! text.empty();
915     }
916 }
917 
918 static const char* const sc_ValidExceptionText[] = {
919     "annotated by transcript or proteomic data",
920     "rearrangement required for product",
921     "reasons given in citation",
922     "RNA editing"
923 };
924 typedef CStaticArraySet<const char*, PNocase_CStr> TLegalExceptText;
925 DEFINE_STATIC_ARRAY_MAP(TLegalExceptText, sc_LegalExceptText, sc_ValidExceptionText);
926 
s_IsValidExceptionText(const string & text)927 static bool s_IsValidExceptionText(const string& text)
928 {
929     return sc_LegalExceptText.find(text.c_str()) != sc_LegalExceptText.end();
930 }
931 
932 
933 static const char* const sc_ValidRefSeqExceptionText[] = {
934     "adjusted for low-quality genome",
935     "alternative processing",
936     "alternative start codon",
937     "artificial frameshift",
938     "dicistronic gene",
939     "mismatches in transcription",
940     "mismatches in translation",
941     "modified codon recognition",
942     "nonconsensus splice site",
943     "transcribed product replaced",
944     "transcribed pseudogene",
945     "translated product replaced",
946     "unclassified transcription discrepancy",
947     "unclassified translation discrepancy",
948     "unextendable partial coding region"
949 };
950 typedef CStaticArraySet<const char*, PNocase> TLegalRefSeqExceptText;
951 DEFINE_STATIC_ARRAY_MAP(TLegalRefSeqExceptText, sc_LegalRefSeqExceptText, sc_ValidRefSeqExceptionText);
952 
s_IsValidRefSeqExceptionText(const string & text)953 static bool s_IsValidRefSeqExceptionText(const string& text)
954 {
955     return sc_LegalRefSeqExceptText.find(text.c_str()) != sc_LegalRefSeqExceptText.end();
956 }
957 
s_GetGbValue(CConstRef<CSeq_feat> feat,const string & key,string & value)958 bool s_GetGbValue( CConstRef<CSeq_feat> feat, const string& key, string& value )
959 {
960     if ( ! feat->IsSetQual() ) {
961         return false;
962     }
963     const CSeq_feat_Base::TQual & qual = feat->GetQual(); // must store reference since ITERATE macro evaluates 3rd arg multiple times
964     ITERATE( CSeq_feat::TQual, it, qual ) {
965         if (!(*it)->IsSetQual()  ||  !(*it)->IsSetVal()) {
966             continue;
967         }
968         if ( (*it)->GetQual() != key ) {
969             continue;
970         }
971         value = (*it)->GetVal();
972         return true;
973     }
974     return false;
975 }
976 
977 
978 // -- FeatureItemBase
979 
CFeatureItemBase(const CMappedFeat & feat,CBioseqContext & ctx,CRef<feature::CFeatTree> ftree,const CSeq_loc * loc,bool suppressAccession)980 CFeatureItemBase::CFeatureItemBase
981 (const CMappedFeat& feat,
982  CBioseqContext& ctx,
983  CRef<feature::CFeatTree> ftree,
984  const CSeq_loc* loc,
985  bool suppressAccession) :
986     CFlatItem(&ctx), m_Feat(feat), m_Feat_Tree(ftree), m_Loc(loc ? loc :
987                                          (feat ? &feat.GetLocation() : NULL)),
988     m_SuppressAccession(suppressAccession)
989 {
990     if (m_Feat) {
991         x_SetObject(m_Feat.GetOriginalFeature());
992 
993         CSeq_feat_Handle feat = m_Feat.GetSeq_feat_Handle();
994         const CSeq_annot_Handle& ah = feat.GetAnnot();
995         CSeq_entry_Handle seh = ah.GetParentEntry();
996         if (! seh) {
997             x_SetExternal();
998         }
999     }
1000 }
1001 
Format(void) const1002 CConstRef<CFlatFeature> CFeatureItemBase::Format(void) const
1003 {
1004     CRef<CFlatFeature> ff(new CFlatFeature(GetKey(),
1005                           *new CFlatSeqLoc(GetLoc(), *GetContext(), CFlatSeqLoc::eType_location, false, false, this->IsSuppressAccession()),
1006                           m_Feat));
1007     if ( ff ) {
1008         x_FormatQuals(*ff);
1009     }
1010     return ff;
1011 }
1012 
1013 
1014 //  -- CFeatureItem
1015 
GetKey(void) const1016 string CFeatureItem::GetKey(void) const
1017 {
1018     CBioseqContext& ctx = *GetContext();
1019 
1020     CSeqFeatData::E_Choice type = m_Feat.GetData().Which();
1021     CSeqFeatData::ESubtype subtype = m_Feat.GetData().GetSubtype();
1022 
1023     if (GetContext()->IsProt()) {   // protein
1024         if ( IsMappedFromProt()  &&  type == CSeqFeatData::e_Prot ) {
1025             if ( subtype == CSeqFeatData::eSubtype_preprotein         ||
1026                  subtype == CSeqFeatData::eSubtype_mat_peptide_aa     ||
1027                 subtype == CSeqFeatData::eSubtype_sig_peptide_aa     ||
1028                 subtype == CSeqFeatData::eSubtype_transit_peptide_aa     ||
1029                 subtype == CSeqFeatData::eSubtype_propeptide_aa ) {
1030                 return "Precursor";
1031             }
1032         }
1033         switch ( subtype ) {
1034         case CSeqFeatData::eSubtype_region:
1035             return "Region";
1036         case CSeqFeatData::eSubtype_bond:
1037             return "Bond";
1038         case CSeqFeatData::eSubtype_site:
1039             return "Site";
1040         default:
1041             break;
1042         }
1043     } else {  // nucleotide
1044         switch ( subtype ) {
1045 
1046         case CSeqFeatData::eSubtype_ncRNA:
1047             return "ncRNA";
1048 
1049         case CSeqFeatData::eSubtype_tmRNA:
1050             return "tmRNA";
1051 
1052         case CSeqFeatData::eSubtype_preprotein:
1053             if ( !ctx.IsRefSeq() ) {
1054                 return "misc_feature";
1055             }
1056             break;
1057 
1058         case CSeqFeatData::eSubtype_site:
1059         case CSeqFeatData::eSubtype_bond:
1060         case CSeqFeatData::eSubtype_region:
1061         case CSeqFeatData::eSubtype_comment:
1062             return "misc_feature";
1063 
1064         default:
1065             break;
1066         }
1067     }
1068 
1069     // deal with unmappable impfeats
1070     if (subtype == CSeqFeatData::eSubtype_imp  &&  type == CSeqFeatData::e_Imp) {
1071         const CSeqFeatData::TImp& imp = m_Feat.GetData().GetImp();
1072         if ( imp.IsSetKey() ) {
1073             return imp.GetKey();
1074         }
1075     }
1076 
1077     if (type == CSeqFeatData::e_Imp) {
1078         switch ( subtype ) {
1079         case CSeqFeatData::eSubtype_enhancer:
1080         case CSeqFeatData::eSubtype_promoter:
1081         case CSeqFeatData::eSubtype_CAAT_signal:
1082         case CSeqFeatData::eSubtype_TATA_signal:
1083         case CSeqFeatData::eSubtype_35_signal:
1084         case CSeqFeatData::eSubtype_10_signal:
1085         case CSeqFeatData::eSubtype_GC_signal:
1086         case CSeqFeatData::eSubtype_RBS:
1087         case CSeqFeatData::eSubtype_polyA_signal:
1088         case CSeqFeatData::eSubtype_attenuator:
1089         case CSeqFeatData::eSubtype_terminator:
1090         case CSeqFeatData::eSubtype_misc_signal:
1091             return "regulatory";
1092         default:
1093             break;
1094         }
1095     }
1096 
1097     return CFeatureItemBase::GetKey();
1098 }
1099 
1100 
1101 // constructor from CSeq_feat
CFeatureItem(const CMappedFeat & feat,CBioseqContext & ctx,CRef<feature::CFeatTree> ftree,const CSeq_loc * loc,EMapped mapped,bool suppressAccession,CConstRef<CFeatureItem> parentFeatureItem)1102 CFeatureItem::CFeatureItem
1103 (const CMappedFeat& feat,
1104  CBioseqContext& ctx,
1105  CRef<feature::CFeatTree> ftree,
1106  const CSeq_loc* loc,
1107  EMapped mapped,
1108  bool suppressAccession,
1109  CConstRef<CFeatureItem> parentFeatureItem) :
1110     CFeatureItemBase(feat, ctx, ftree, loc, suppressAccession), m_Mapped(mapped)
1111 {
1112     x_GatherInfoWithParent(ctx, parentFeatureItem);
1113 }
1114 
GetItemType(void) const1115 IFlatItem::EItem CFeatureItem::GetItemType(void) const
1116 {
1117     return eItem_Feature;
1118 }
1119 
x_GatherInfoWithParent(CBioseqContext & ctx,CConstRef<CFeatureItem> parentFeatureItem)1120 void CFeatureItem::x_GatherInfoWithParent(CBioseqContext& ctx, CConstRef<CFeatureItem> parentFeatureItem )
1121 {
1122     if ( s_SkipFeature(GetFeat(), GetLoc(), ctx) ) {
1123         x_SetSkip();
1124         return;
1125     }
1126     m_Type = m_Feat.GetData().GetSubtype();
1127     x_AddQuals(ctx, parentFeatureItem );
1128 }
1129 
1130 //  ----------------------------------------------------------------------------
x_AddQualPartial(CBioseqContext & ctx)1131 void CFeatureItem::x_AddQualPartial(
1132     CBioseqContext& ctx )
1133 //
1134 //  Note: /partial has been depricated since DEC-2001. Current policy is to
1135 //  suppress /partial in entrez and release modes and let it stand in gbench and
1136 //  dump modes
1137 //  ----------------------------------------------------------------------------
1138 {
1139     if ( !ctx.Config().HideUnclassPartial() ) {
1140         if ( !IsMappedFromCDNA() || !ctx.IsProt() ) {
1141             if ( m_Feat.IsSetPartial()  &&  m_Feat.GetPartial() ) {
1142                 if ( eSeqlocPartial_Complete == sequence::SeqLocPartialCheck( GetLoc(), &ctx.GetScope() ) &&
1143                     !s_LocIsFuzz( m_Feat, GetLoc() ) )
1144                 {
1145                     x_AddQual( eFQ_partial, new CFlatBoolQVal( true ) );
1146                 }
1147             }
1148         }
1149     }
1150 }
1151 
1152 //  ----------------------------------------------------------------------------
x_AddQualOperon(CBioseqContext & ctx,CSeqFeatData::ESubtype subtype)1153 void CFeatureItem::x_AddQualOperon(
1154     CBioseqContext& ctx,
1155     CSeqFeatData::ESubtype subtype )
1156 //  ----------------------------------------------------------------------------
1157 {
1158     if ( subtype == CSeqFeatData::eSubtype_operon ||
1159          subtype == CSeqFeatData::eSubtype_gap ) {
1160         return;
1161     }
1162 
1163     // bail if this type of object is not allowed to carry an operon
1164     if( ! x_IsSeqFeatDataFeatureLegal( CSeqFeatData::eQual_operon ) ) {
1165         return;
1166     }
1167 
1168     const CGene_ref* gene_ref = m_Feat.GetGeneXref();
1169     if ( gene_ref == NULL  ||  !gene_ref->IsSuppressed()) {
1170             const CSeq_loc& operon_loc = ( ctx.IsProt() || !IsMapped() ) ?
1171                 m_Feat.GetLocation() : GetLoc();
1172         CConstRef<CSeq_feat> operon
1173             = GetOverlappingOperon( operon_loc, ctx.GetScope() );
1174         if ( operon ) {
1175             const string& operon_name = operon->GetNamedQual( "operon" );
1176             if ( !operon_name.empty() ) {
1177                 x_AddQual(eFQ_operon, new CFlatStringQVal(operon_name));
1178             }
1179         }
1180     }
1181 }
1182 
1183 //  ----------------------------------------------------------------------------
x_AddQualsRegulatoryClass(CBioseqContext & ctx,CSeqFeatData::ESubtype subtype)1184 void CFeatureItem::x_AddQualsRegulatoryClass(
1185     CBioseqContext& ctx,
1186     CSeqFeatData::ESubtype subtype )
1187 //  ----------------------------------------------------------------------------
1188 {
1189     _ASSERT( m_Feat.GetData().IsImp() );
1190 
1191     switch ( subtype ) {
1192     case CSeqFeatData::eSubtype_enhancer:
1193         x_AddQual(eFQ_regulatory_class, new CFlatStringQVal("enhancer"));
1194         break;
1195     case CSeqFeatData::eSubtype_promoter:
1196         x_AddQual(eFQ_regulatory_class, new CFlatStringQVal("promoter"));
1197         break;
1198     case CSeqFeatData::eSubtype_CAAT_signal:
1199         x_AddQual(eFQ_regulatory_class, new CFlatStringQVal("CAAT_signal"));
1200         break;
1201     case CSeqFeatData::eSubtype_TATA_signal:
1202         x_AddQual(eFQ_regulatory_class, new CFlatStringQVal("TATA_box"));
1203         break;
1204     case CSeqFeatData::eSubtype_35_signal:
1205         x_AddQual(eFQ_regulatory_class, new CFlatStringQVal("minus_35_signal"));
1206         break;
1207     case CSeqFeatData::eSubtype_10_signal:
1208         x_AddQual(eFQ_regulatory_class, new CFlatStringQVal("minus_10_signal"));
1209         break;
1210     case CSeqFeatData::eSubtype_GC_signal:
1211         x_AddQual(eFQ_regulatory_class, new CFlatStringQVal("GC_signal"));
1212         break;
1213     case CSeqFeatData::eSubtype_RBS:
1214         x_AddQual(eFQ_regulatory_class, new CFlatStringQVal("ribosome_binding_site"));
1215         break;
1216     case CSeqFeatData::eSubtype_polyA_signal:
1217         x_AddQual(eFQ_regulatory_class, new CFlatStringQVal("polyA_signal_sequence"));
1218         break;
1219     case CSeqFeatData::eSubtype_attenuator:
1220         x_AddQual(eFQ_regulatory_class, new CFlatStringQVal("attenuator"));
1221         break;
1222     case CSeqFeatData::eSubtype_terminator:
1223         x_AddQual(eFQ_regulatory_class, new CFlatStringQVal("terminator"));
1224         break;
1225     case CSeqFeatData::eSubtype_misc_signal:
1226         x_AddQual(eFQ_regulatory_class, new CFlatStringQVal("other"));
1227         break;
1228     default:
1229         break;
1230     }
1231 }
1232 
1233 //  ----------------------------------------------------------------------------
x_AddQualPseudo(CBioseqContext & ctx,CSeqFeatData::E_Choice type,CSeqFeatData::ESubtype subtype,bool pseudo)1234 void CFeatureItem::x_AddQualPseudo(
1235     CBioseqContext& ctx,
1236     CSeqFeatData::E_Choice type,
1237     CSeqFeatData::ESubtype subtype,
1238     bool pseudo )
1239 //  ----------------------------------------------------------------------------
1240 {
1241     if ( !pseudo ||
1242         subtype == CSeqFeatData::eSubtype_mobile_element ||
1243         subtype == CSeqFeatData::eSubtype_centromere ||
1244         subtype == CSeqFeatData::eSubtype_telomere )
1245     {
1246         return;
1247     }
1248 
1249     if (ctx.Config().DropIllegalQuals()  &&
1250         ( type == CSeqFeatData::e_Rna || type == CSeqFeatData::e_Imp ) )
1251     {
1252         switch (subtype) {
1253             case  CSeqFeatData::eSubtype_allele:
1254             case  CSeqFeatData::eSubtype_conflict:
1255             case  CSeqFeatData::eSubtype_D_loop:
1256             case  CSeqFeatData::eSubtype_iDNA:
1257             case  CSeqFeatData::eSubtype_LTR:
1258             case  CSeqFeatData::eSubtype_misc_binding:
1259             case  CSeqFeatData::eSubtype_misc_difference:
1260             case  CSeqFeatData::eSubtype_misc_recomb:
1261             case  CSeqFeatData::eSubtype_misc_RNA:
1262             case  CSeqFeatData::eSubtype_misc_structure:
1263             case  CSeqFeatData::eSubtype_modified_base:
1264             case  CSeqFeatData::eSubtype_mutation:
1265             case  CSeqFeatData::eSubtype_old_sequence:
1266             case  CSeqFeatData::eSubtype_polyA_site:
1267             case  CSeqFeatData::eSubtype_precursor_RNA:
1268             case  CSeqFeatData::eSubtype_prim_transcript:
1269             case  CSeqFeatData::eSubtype_primer_bind:
1270             case  CSeqFeatData::eSubtype_protein_bind:
1271             case  CSeqFeatData::eSubtype_repeat_region:
1272             case  CSeqFeatData::eSubtype_repeat_unit:
1273             case  CSeqFeatData::eSubtype_rep_origin:
1274             case  CSeqFeatData::eSubtype_satellite:
1275             case  CSeqFeatData::eSubtype_stem_loop:
1276             case  CSeqFeatData::eSubtype_STS:
1277             case  CSeqFeatData::eSubtype_unsure:
1278             case  CSeqFeatData::eSubtype_variation:
1279             case  CSeqFeatData::eSubtype_3clip:
1280             case  CSeqFeatData::eSubtype_3UTR:
1281             case  CSeqFeatData::eSubtype_5clip:
1282             case  CSeqFeatData::eSubtype_5UTR:
1283                 return;
1284             default:
1285                 break;
1286         }
1287     }
1288     x_AddQual( eFQ_pseudo, new CFlatBoolQVal( true ) );
1289 }
1290 
1291 //  ----------------------------------------------------------------------------
x_AddQualSeqfeatNote(CBioseqContext & ctx)1292 void CFeatureItem::x_AddQualSeqfeatNote(CBioseqContext &ctx)
1293 //  ----------------------------------------------------------------------------
1294 {
1295     string precursor_comment;
1296     // set precursor_comment, if needed.
1297     // It's set from the feature's product's best protein's comment
1298     if( GetContext()->IsProt() && IsMappedFromProt() && m_Feat.IsSetProduct() ) {
1299         const CSeq_id* prod_id = m_Feat.GetProduct().GetId();
1300         if( prod_id != NULL ) {
1301             CBioseq_Handle prod_bioseq = GetContext()->GetScope().GetBioseqHandle(*prod_id);
1302             if( prod_bioseq ) {
1303                 CMappedFeat best_prot_feat = s_GetBestProtFeature( prod_bioseq );
1304                 if( best_prot_feat && best_prot_feat.IsSetComment() ) {
1305                     precursor_comment = best_prot_feat.GetComment() ;
1306                 }
1307             }
1308         }
1309     }
1310 
1311     if (m_Feat.IsSetComment()) {
1312         string comment = m_Feat.GetComment();
1313 
1314         TrimSpacesAndJunkFromEnds( comment, true );
1315         if ( ! comment.empty() && comment != "~" && comment != precursor_comment) {
1316             bool bAddPeriod = RemovePeriodFromEnd( comment, true );
1317             ConvertQuotes(comment);
1318             CRef<CFlatStringQVal> seqfeat_note( new CFlatStringQVal( comment ) );
1319 //            if ( bAddPeriod &&  ! x_GetStringQual(eFQ_prot_desc ) ) {
1320             // careful! Period must be removed if we have a valid eFQ_prot_desc
1321             // Examples to test some cases: AB001488, M96268
1322             if ( bAddPeriod ) {
1323                 seqfeat_note->SetAddPeriod();
1324             }
1325             x_AddQual( eFQ_seqfeat_note, seqfeat_note );
1326         }
1327     }
1328 
1329     /// also scan the annot to see if there is a comment there, if required
1330     if( ! ctx.ShowAnnotCommentAsCOMMENT() ) {
1331         if (m_Feat.GetAnnot().Seq_annot_IsSetDesc()) {
1332             ITERATE (CSeq_annot::TDesc::Tdata, it,
1333                 m_Feat.GetAnnot().Seq_annot_GetDesc().Get()) {
1334                     if ((*it)->IsComment()) {
1335                         const string & comment = (*it)->GetComment();
1336                         // certain comments require special handling
1337                         const static string ktRNAscanSE = "tRNA features were annotated by tRNAscan-SE";
1338                         if( NStr::StartsWith(comment, ktRNAscanSE, NStr::eNocase) /* && ! x_HasMethodtRNAscanSE() */ )
1339                         {
1340                             if ( m_Feat.GetData().GetSubtype() != CSeqFeatData::eSubtype_tRNA ) {
1341                                 // don't propagate tRNAscan-SE comments to irrelevant features
1342                                 continue;
1343                             }
1344                         }
1345                         string comm = comment;
1346                         TrimSpacesAndJunkFromEnds( comm, false );
1347                         RemovePeriodFromEnd( comm, true );
1348                         x_AddQual(eFQ_seqfeat_note,
1349                             new CFlatStringQVal(comm));
1350                     }
1351             }
1352         }
1353     }
1354 
1355 }
1356 
1357 //  ----------------------------------------------------------------------------
x_AddQualExpInv(CBioseqContext & ctx)1358 void CFeatureItem::x_AddQualExpInv(
1359     CBioseqContext& ctx )
1360 //  ----------------------------------------------------------------------------
1361 {
1362     if ( ! m_Feat.IsSetExp_ev() ) {
1363         return;
1364     }
1365 
1366     string value;
1367     if ( m_Feat.GetExp_ev() == CSeq_feat::eExp_ev_experimental ) {
1368         if ( ! x_GetGbValue( "experiment", value ) && ! x_GetGbValue( "inference", value ) ) {
1369             x_AddQual( eFQ_experiment, new CFlatExperimentQVal() );
1370         }
1371     }
1372     else if ( ! x_GetGbValue( "inference", value ) ) {
1373         x_AddQual(eFQ_inference, new CFlatInferenceQVal( "" ));
1374     }
1375 }
1376 
1377 static
s_TransSplicingFeatureAllowed(const CSeqFeatData & data)1378 bool s_TransSplicingFeatureAllowed(
1379     const CSeqFeatData& data )
1380 {
1381     switch( data.GetSubtype() ) {
1382         case CSeqFeatData::eSubtype_gene:
1383         case CSeqFeatData::eSubtype_cdregion:
1384         case CSeqFeatData::eSubtype_mRNA:
1385         case CSeqFeatData::eSubtype_tRNA:
1386         case CSeqFeatData::eSubtype_preRNA:
1387         case CSeqFeatData::eSubtype_otherRNA:
1388         case CSeqFeatData::eSubtype_exon:
1389         case CSeqFeatData::eSubtype_intron:
1390         case CSeqFeatData::eSubtype_3clip:
1391         case CSeqFeatData::eSubtype_3UTR:
1392         case CSeqFeatData::eSubtype_5clip:
1393         case CSeqFeatData::eSubtype_5UTR:
1394             return true;
1395         default:
1396             return false;
1397     }
1398 }
1399 
1400 //  ----------------------------------------------------------------------------
x_AddQualExceptions(CBioseqContext & ctx)1401 void CFeatureItem::x_AddQualExceptions(
1402     CBioseqContext& ctx )
1403 //
1404 //  Add any existing exception qualifiers.
1405 //  Note: These include /ribosomal_slippage and /trans-splicing as special
1406 //  cases. Also, some exceptions are listed as notes.
1407 //  ----------------------------------------------------------------------------
1408 {
1409     const CSeqFeatData& data  = m_Feat.GetData();
1410 
1411     string raw_exception;
1412 
1413     if ( ( m_Feat.IsSetExcept() && m_Feat.GetExcept() ) &&
1414         (m_Feat.IsSetExcept_text()  &&  !m_Feat.GetExcept_text().empty()) ) {
1415             raw_exception = m_Feat.GetExcept_text();
1416     }
1417     if ( raw_exception == "" ) {
1418         return;
1419     }
1420 
1421     const bool bIsRefseq = ctx.IsRefSeq();
1422     // const bool bIsRelaxed = ( ! cfg.DropIllegalQuals() );
1423     const bool bIsRelaxed = ((! ctx.Config().IsModeRelease()) && (! ctx.Config().IsModeEntrez()));
1424 
1425     list<string> exceptions;
1426     NStr::Split( raw_exception, ",", exceptions, NStr::fSplit_Tokenize );
1427 
1428     list<string> output_exceptions;
1429     list<string> output_notes;
1430     ITERATE( list<string>, it, exceptions ) {
1431         string cur = NStr::TruncateSpaces( *it );
1432         if( cur.empty() ) {
1433             continue;
1434         }
1435 
1436         //
1437         //  If exceptions are legal then it depends on the exception. Some are
1438         //  turned into their own custom qualifiers. Others are allowed to stand
1439         //  as exceptions, while others are turned into notes.
1440         //
1441         if ( s_IsValidExceptionText( cur ) ) {
1442             if( bIsRefseq || bIsRelaxed || data.IsCdregion() ) {
1443                 output_exceptions.push_back( cur );
1444             } else {
1445                 output_notes.push_back( cur );
1446             }
1447             continue;
1448         }
1449         if ( s_IsValidRefSeqExceptionText( cur ) ) {
1450             if( bIsRefseq || bIsRelaxed ) {
1451                 output_exceptions.push_back( cur );
1452             } else {
1453                 output_notes.push_back( cur );
1454             }
1455             continue;
1456         }
1457         if ( NStr::EqualNocase(cur, "ribosomal slippage") ) {
1458             if( data.IsCdregion() ) {
1459                 x_AddQual( eFQ_ribosomal_slippage, new CFlatBoolQVal( true ) );
1460             } else {
1461                 output_notes.push_back( cur );
1462             }
1463             continue;
1464         }
1465         if ( NStr::EqualNocase(cur, "trans-splicing") ) {
1466             if( s_TransSplicingFeatureAllowed( data ) ) {
1467                 x_AddQual( eFQ_trans_splicing, new CFlatBoolQVal( true ) );
1468             } else {
1469                 output_notes.push_back( cur );
1470             }
1471             continue;
1472         }
1473         if ( NStr::EqualNocase(cur, "circular RNA") ) {
1474             if( data.IsRna() ) {
1475               x_AddQual( eFQ_circular_RNA, new CFlatBoolQVal( true ) );
1476             } else {
1477                 output_notes.push_back( cur );
1478             }
1479             continue;
1480         }
1481         const bool is_cds_or_mrna = ( data.IsCdregion() ||
1482             data.GetSubtype() == CSeqFeatData::eSubtype_mRNA );
1483         if( NStr::EqualNocase(cur, "artificial location") ) {
1484             if( is_cds_or_mrna ) {
1485                 x_AddQual( eFQ_artificial_location, new CFlatBoolQVal( true ) );
1486             } else {
1487                 output_notes.push_back( cur );
1488             }
1489             continue;
1490         }
1491         if( NStr::EqualNocase(cur, "heterogeneous population sequenced") ||
1492             NStr::EqualNocase(cur, "low-quality sequence region") )
1493         {
1494             if( is_cds_or_mrna ) {
1495                 x_AddQual( eFQ_artificial_location, new CFlatStringQVal( cur ) );
1496             } else {
1497                 output_notes.push_back( cur );
1498             }
1499             continue;
1500         }
1501         else {
1502             if ( bIsRelaxed ) {
1503                 output_exceptions.push_back( cur );
1504             }
1505             else {
1506                 output_notes.push_back( cur );
1507             }
1508         }
1509     }
1510     if ( ! output_exceptions.empty() ) {
1511         string exception = NStr::Join( output_exceptions, ", " );
1512         x_AddQual(eFQ_exception, new CFlatStringQVal( exception ) );
1513     }
1514     if ( ! output_notes.empty() ) {
1515         string note = NStr::Join( output_notes, ", " );
1516         x_AddQual(eFQ_exception_note, new CFlatStringQVal( note ) );
1517     }
1518 }
1519 
1520 //  ----------------------------------------------------------------------------
x_AddQualNote(CConstRef<CSeq_feat> gene_feat)1521 void CFeatureItem::x_AddQualNote(
1522     CConstRef<CSeq_feat> gene_feat )
1523 //  ----------------------------------------------------------------------------
1524 {
1525     if ( ! gene_feat || ! gene_feat->IsSetComment() ) {
1526         return;
1527     }
1528     x_AddQual( eFQ_gene_note, new CFlatStringQVal(
1529         gene_feat->GetComment() ) );
1530 }
1531 
1532 //  ----------------------------------------------------------------------------
x_AddQualGeneXref(const CGene_ref * gene_ref,const CConstRef<CSeq_feat> & gene_feat)1533 void CFeatureItem::x_AddQualGeneXref(
1534     const CGene_ref* gene_ref,
1535     const CConstRef<CSeq_feat>& gene_feat )
1536 //  ----------------------------------------------------------------------------
1537 {
1538     const CSeqFeatData& data  = m_Feat.GetData();
1539     CSeqFeatData::E_Choice type = data.Which();
1540 
1541     if ( type == CSeqFeatData::e_Cdregion || type == CSeqFeatData::e_Rna ) {
1542         if ( ! gene_ref && gene_feat ) {
1543             gene_ref = &gene_feat->GetData().GetGene();
1544             if ( gene_ref != NULL  &&  gene_ref->IsSetDb() ) {
1545                 x_AddQual(
1546                     eFQ_gene_xref, new CFlatXrefQVal( gene_ref->GetDb() ) );
1547             } else if ( gene_feat->IsSetDbxref() ) {
1548                 x_AddQual(
1549                     eFQ_gene_xref, new CFlatXrefQVal( gene_feat->GetDbxref() ) );
1550             }
1551         }
1552     }
1553 }
1554 
1555 //  ----------------------------------------------------------------------------
x_AddQualOldLocusTag(const CBioseqContext & ctx,CConstRef<CSeq_feat> gene_feat)1556 void CFeatureItem::x_AddQualOldLocusTag(
1557     const CBioseqContext& ctx,
1558     CConstRef<CSeq_feat> gene_feat )
1559 //
1560 //  For non-gene features, add /old_locus_tag, if one exists somewhere.
1561 //  ----------------------------------------------------------------------------
1562 {
1563     if ( ! gene_feat ) {
1564         return;
1565     }
1566 
1567     if ( ctx.IsProt() ) {
1568         // skip if GenPept format and not gene or CDS
1569         const CSeqFeatData& data = m_Feat.GetData();
1570         CSeqFeatData::ESubtype subtype = data.GetSubtype();
1571         if (subtype != CSeqFeatData::eSubtype_gene && subtype != CSeqFeatData::eSubtype_cdregion) {
1572             return;
1573         }
1574     }
1575 
1576     const CSeq_feat::TQual& quals = gene_feat->GetQual();
1577     for ( size_t iPos = 0; iPos < quals.size(); ++iPos ) {
1578         CRef< CGb_qual > qual = quals[ iPos ];
1579         if ( ! qual->IsSetQual() || ! qual->IsSetVal() ) {
1580             continue;
1581         }
1582         if ( qual->GetQual() == "old_locus_tag" ) {
1583             x_AddQual(eFQ_old_locus_tag,
1584                 new CFlatStringQVal( qual->GetVal(), CFormatQual::eTrim_WhitespaceOnly ) );
1585         }
1586     }
1587 }
1588 
1589 //  ----------------------------------------------------------------------------
x_GetPseudo(const CGene_ref * gene_ref,const CSeq_feat * gene_feat) const1590 bool CFeatureItem::x_GetPseudo(
1591     const CGene_ref* gene_ref,
1592     const CSeq_feat* gene_feat ) const
1593 //  ----------------------------------------------------------------------------
1594 {
1595     const CSeqFeatData& data  = m_Feat.GetData();
1596     CSeqFeatData::E_Choice type = data.Which();
1597     CSeqFeatData::ESubtype subtype = data.GetSubtype();
1598 
1599     bool pseudo = m_Feat.IsSetPseudo() ? m_Feat.GetPseudo() : false;
1600     if ( type != CSeqFeatData::e_Gene &&
1601          subtype != CSeqFeatData::eSubtype_operon &&
1602          subtype != CSeqFeatData::eSubtype_gap )
1603     {
1604         if ( gene_feat && gene_feat->IsSetPseudo() && gene_feat->GetPseudo() ) {
1605             return true;
1606             const CGene_ref* altref = &gene_feat->GetData().GetGene();
1607             if ( altref && altref->IsSetPseudo() && altref->GetPseudo() ) {
1608                 return true;
1609             }
1610         }
1611         if ( gene_ref && gene_ref->IsSetPseudo() && gene_ref->GetPseudo() ) {
1612             return true;
1613         }
1614     }
1615     if ( type == CSeqFeatData::e_Gene ) {
1616         if ( data.GetGene().IsSetPseudo() && data.GetGene().GetPseudo() ) {
1617             return true;
1618         }
1619     }
1620     if ( type == CSeqFeatData::e_Rna ) {
1621         if ( data.GetRna().IsSetPseudo() && data.GetRna().GetPseudo() ) {
1622             return true;
1623         }
1624     }
1625     return pseudo;
1626 }
1627 
x_AddQualsIdx(CBioseqContext & ctx,CConstRef<CFeatureItem> parentFeatureItem)1628 void CFeatureItem::x_AddQualsIdx(
1629     CBioseqContext& ctx,
1630     CConstRef<CFeatureItem> parentFeatureItem )
1631 {
1632     CRef<CSeqEntryIndex> idx = ctx.GetSeqEntryIndex();
1633     if (! idx) return;
1634     CBioseq_Handle hdl = ctx.GetHandle();
1635     CRef<CBioseqIndex> bsx = idx->GetBioseqIndex (hdl);
1636     if (! bsx) return;
1637 
1638     const CSeqFeatData& data  = m_Feat.GetData();
1639     CSeqFeatData::E_Choice type = data.Which();
1640     CSeqFeatData::ESubtype subtype = data.GetSubtype();
1641 
1642     bool is_not_genbank = false;
1643     {{
1644         ITERATE( CBioseq::TId, id_iter, ctx.GetBioseqIds() ) {
1645             const CSeq_id& id = **id_iter;
1646 
1647             switch ( id.Which() ) {
1648                 case CSeq_id_Base::e_Embl:
1649                 case CSeq_id_Base::e_Ddbj:
1650                 case CSeq_id_Base::e_Tpe:
1651                 case CSeq_id_Base::e_Tpd:
1652                     is_not_genbank = true;
1653                     break;
1654                 default:
1655                     // do nothing
1656                     break;
1657             }
1658         }
1659     }}
1660 
1661     const CGene_ref* gene_ref = 0;
1662     CConstRef<CSeq_feat> gene_feat;
1663     const CGene_ref* feat_gene_xref = 0;
1664     feat_gene_xref = m_Feat.GetGeneXref();
1665     if (feat_gene_xref == 0 && parentFeatureItem) {
1666         feat_gene_xref = parentFeatureItem->GetFeat().GetGeneXref();
1667     }
1668     bool suppressed = false;
1669 
1670     const bool gene_forbidden_if_genbank =
1671         ( subtype == CSeqFeatData::eSubtype_mobile_element ||
1672           subtype == CSeqFeatData::eSubtype_centromere ||
1673           subtype == CSeqFeatData::eSubtype_telomere );
1674 
1675     if ( type == CSeqFeatData::e_Gene ) {
1676     } else if (subtype != CSeqFeatData::eSubtype_operon &&
1677                subtype != CSeqFeatData::eSubtype_gap &&
1678                (is_not_genbank || ! gene_forbidden_if_genbank)) {
1679         if (feat_gene_xref) {
1680             if (feat_gene_xref->IsSuppressed()) {
1681                 suppressed = true;
1682             }
1683         }
1684 
1685         if (feat_gene_xref && ! suppressed) {
1686             // RW-943
1687             // gene_ref = feat_gene_xref;
1688             CRef<CFeatureIndex> ft = bsx->GetFeatIndex (m_Feat);
1689             if (! ft) {
1690                 if (parentFeatureItem) {
1691                     // RW-985 fix for RW-943 dropping xrefs on sig_peptide and mat_peptide
1692                     ft = bsx->GetFeatIndex (parentFeatureItem->GetFeat());
1693                 } else {
1694                     // SF-3276 BAM94483 coded_by CDS was not getting xref'd gene
1695                     ft = bsx->GetFeatureForProduct();
1696                 }
1697             }
1698             if (ft) {
1699                 CRef<CFeatureIndex> fsx = ft->GetBestGene();
1700                 if (fsx) {
1701                     const CMappedFeat mf = fsx->GetMappedFeat();
1702                     if (mf) {
1703                         const CGene_ref* gr = 0;
1704                         CConstRef<CSeq_feat> gf;
1705                         gf = &(mf.GetMappedFeature());
1706                         gr = &(mf.GetData().GetGene());
1707                         if (gr) {
1708                             if (feat_gene_xref->IsSetLocus_tag() && gr->IsSetLocus_tag()) {
1709                                 if (feat_gene_xref->GetLocus_tag() == gr->GetLocus_tag()) {
1710                                     gene_feat = &(mf.GetMappedFeature());
1711                                     gene_ref = &(mf.GetData().GetGene());
1712                                 } else {
1713                                     // RW-985
1714                                     gene_ref = feat_gene_xref;
1715                                 }
1716                             } else if (feat_gene_xref->IsSetLocus() && gr->IsSetLocus()) {
1717                                 if (feat_gene_xref->GetLocus() == gr->GetLocus()) {
1718                                     gene_feat = &(mf.GetMappedFeature());
1719                                     gene_ref = &(mf.GetData().GetGene());
1720                                 } else {
1721                                     // RW-985
1722                                     gene_ref = feat_gene_xref;
1723                                 }
1724                             } else {
1725                                 // SF-3822 - map locus in xref to desc in gene
1726                                 gene_ref = feat_gene_xref;
1727                             }
1728                         }
1729                     }
1730                 } else {
1731                     // RW-943
1732                     gene_ref = feat_gene_xref;
1733                 }
1734             }
1735         } else if ((! feat_gene_xref || ! suppressed) &&
1736                    subtype != CSeqFeatData::eSubtype_primer_bind) {
1737             CRef<CFeatureIndex> ft;
1738             bool is_mapped = false;
1739             if (parentFeatureItem) {
1740                 ft = bsx->GetFeatIndex (parentFeatureItem->GetFeat());
1741                 if (ft) {
1742                     if (subtype == CSeqFeatData::eSubtype_preprotein         ||
1743                         subtype == CSeqFeatData::eSubtype_mat_peptide_aa     ||
1744                         subtype == CSeqFeatData::eSubtype_sig_peptide_aa     ||
1745                         subtype == CSeqFeatData::eSubtype_transit_peptide_aa     ||
1746                         subtype == CSeqFeatData::eSubtype_propeptide_aa) {
1747                         try {
1748                             if ( m_Feat.IsSetXref() ) {
1749                                 feat_gene_xref = m_Feat.GetGeneXref();
1750                                 if ( feat_gene_xref ) {
1751                                     gene_ref = feat_gene_xref;
1752                                     is_mapped = true;
1753                                 }
1754                             }
1755                             if (! is_mapped) {
1756                                 CRef<CFeatureIndex> fsx = ft->GetBestGene();
1757                                 if (fsx) {
1758                                     const CMappedFeat mf = fsx->GetMappedFeat();
1759                                     if (mf) {
1760                                         gene_feat = &(mf.GetMappedFeature());
1761                                         gene_ref = &(mf.GetData().GetGene());
1762                                         is_mapped = true;
1763                                     }
1764                                 }
1765                             }
1766                             if (! is_mapped) {
1767                                 // e.g., check sig_peptide for gene overlapping parent CDS
1768                                 CSeq_feat_Handle parent_feat_handle;
1769                                 parent_feat_handle = parentFeatureItem->GetFeat();
1770                                 CGeneFinder::GetAssociatedGeneInfo( m_Feat, ctx, m_Loc, m_GeneRef, gene_ref,
1771                                                                     gene_feat, parent_feat_handle );
1772                                 is_mapped = true;
1773                             }
1774                         } catch (CException&) {}
1775                     }
1776                 }
1777             } else {
1778                 ft = bsx->GetFeatIndex (m_Feat);
1779                 if (! ft) {
1780                     ft = bsx->GetFeatureForProduct();
1781                 }
1782             }
1783             if (ft && (! is_mapped)) {
1784                 CRef<CFeatureIndex> fsx = ft->GetBestGene();
1785                 if (fsx) {
1786                     const CMappedFeat mf = fsx->GetMappedFeat();
1787                     if (mf) {
1788                         gene_feat = &(mf.GetMappedFeature());
1789                         gene_ref = &(mf.GetData().GetGene());
1790                     }
1791                 } else if (feat_gene_xref) {
1792                     // last resort, e.g., MH013512 after first nuc-prot set
1793                     gene_ref = feat_gene_xref;
1794                 }
1795             }
1796         }
1797     }
1798 
1799     bool pseudo = x_GetPseudo(gene_ref, gene_feat );
1800 
1801     //
1802     //  Collect qualifiers that are specific to a single or just a few feature
1803     //  types:
1804     //
1805     switch ( type ) {
1806     case CSeqFeatData::e_Cdregion:
1807         x_AddQualsCdregionIdx(m_Feat, ctx, pseudo);
1808         break;
1809     case CSeqFeatData::e_Rna:
1810         x_AddQualsRna(m_Feat, ctx, pseudo);
1811         break;
1812     case CSeqFeatData::e_Prot:
1813         x_AddQualsProt(ctx, pseudo);
1814         break;
1815     case CSeqFeatData::e_Region:
1816         x_AddQualsRegion( ctx );
1817         break;
1818     case CSeqFeatData::e_Site:
1819         x_AddQualsSite( ctx );
1820         break;
1821     case CSeqFeatData::e_Bond:
1822         x_AddQualsBond( ctx );
1823         break;
1824     case CSeqFeatData::e_Psec_str:
1825         x_AddQualsPsecStr( ctx );
1826         break;
1827     case CSeqFeatData::e_Non_std_residue:
1828         x_AddQualsNonStd( ctx );
1829         break;
1830     case CSeqFeatData::e_Het:
1831         x_AddQualsHet( ctx );
1832         break;
1833     case CSeqFeatData::e_Variation:
1834         x_AddQualsVariation( ctx );
1835         break;
1836     default:
1837         break;
1838     }
1839 
1840     //
1841     //  Collect qualifiers that are common to most feature types:
1842     //
1843     x_AddQualPartial( ctx );
1844     x_AddQualDbXref( ctx );
1845     x_AddQualExt();
1846     x_AddQualExpInv( ctx );
1847     x_AddQualCitation();
1848     x_AddQualExceptions( ctx );
1849     x_AddQualNote( gene_feat );
1850     x_AddQualOldLocusTag( ctx, gene_feat );
1851     x_AddQualDb( gene_ref );
1852     x_AddQualGeneXref( gene_ref, gene_feat );
1853     if (bsx->HasOperon()) {
1854         x_AddQualOperon( ctx, subtype );
1855     }
1856     x_AddQualsGene( ctx, gene_ref, gene_feat, gene_ref ? false : gene_feat.NotEmpty() );
1857 
1858     x_AddQualPseudo( ctx, type, subtype, pseudo );
1859     x_AddQualsGb( ctx );
1860 
1861     // dynamic mapping of old features to regulatory with regulatory_class qualifier
1862     if ( type == CSeqFeatData::e_Imp ) {
1863        x_AddQualsRegulatoryClass ( ctx, subtype );
1864     }
1865 
1866     x_AddQualSeqfeatNote(ctx);
1867 
1868     // cleanup (drop illegal quals, duplicate information etc.)
1869     x_CleanQuals( gene_ref );
1870 
1871 
1872 }
1873 
1874 //  ----------------------------------------------------------------------------
x_AddQuals(CBioseqContext & ctx,CConstRef<CFeatureItem> parentFeatureItem)1875 void CFeatureItem::x_AddQuals(
1876     CBioseqContext& ctx,
1877     CConstRef<CFeatureItem> parentFeatureItem )
1878 //
1879 //  Add the various qualifiers to this feature. Top level function.
1880 //  ----------------------------------------------------------------------------
1881 {
1882 //    /**fl**/
1883     // leaving this here since it's so useful for debugging purposes.
1884     //21822,22172
1885     /* if(
1886         (GetLoc().GetStart(eExtreme_Biological) == 21821 &&
1887         GetLoc().GetStop(eExtreme_Biological) == 22171) ||
1888         (GetLoc().GetStop(eExtreme_Biological) == 21821 &&
1889         GetLoc().GetStart(eExtreme_Biological) == 22171)
1890         ) {
1891         cerr << ""; // a do-nothing statement in case we forget to comment it out
1892         } */
1893 //    /**fl**/
1894 
1895     if ( ctx.Config().IsFormatFTable() ) {
1896         x_AddFTableQuals( ctx );
1897         return;
1898     }
1899 
1900     if ( ctx.UsingSeqEntryIndex() ) {
1901         x_AddQualsIdx(ctx, parentFeatureItem);
1902         return;
1903     }
1904 
1905     // SQD-4444 : pass annot selector from the context structure
1906     m_Feat_Tree->AddGenesForFeat(m_Feat, ctx.GetAnnotSelector());
1907 
1908     //
1909     //  Collect/Compute data that will be shared between several qualifier
1910     //  collectors:
1911     //
1912     const CSeqFeatData& data  = m_Feat.GetData();
1913     CSeqFeatData::E_Choice type = data.Which();
1914     CSeqFeatData::ESubtype subtype = data.GetSubtype();
1915 //  /**fl**/>>
1916 //    if ( subtype == CSeqFeatData::eSubtype_sig_peptide_aa ||
1917 //        subtype == CSeqFeatData::eSubtype_sig_peptide )
1918 //    {
1919 //        cerr << "Break" << endl;
1920 //    }
1921 //  <</**fl**/
1922 
1923     // check if this is some kind of Genbank record (some of the logic may be a little different in that case)
1924     bool is_not_genbank = false;
1925     {{
1926         ITERATE( CBioseq::TId, id_iter, ctx.GetBioseqIds() ) {
1927             const CSeq_id& id = **id_iter;
1928 
1929             switch ( id.Which() ) {
1930                 case CSeq_id_Base::e_Embl:
1931                 case CSeq_id_Base::e_Ddbj:
1932                 case CSeq_id_Base::e_Tpe:
1933                 case CSeq_id_Base::e_Tpd:
1934                     is_not_genbank = true;
1935                     break;
1936                 default:
1937                     // do nothing
1938                     break;
1939             }
1940         }
1941     }}
1942 
1943 
1944     const CGene_ref* gene_ref = 0;
1945     CConstRef<CSeq_feat> gene_feat;
1946     const CGene_ref* feat_gene_xref = m_Feat.GetGeneXref();
1947     bool suppressed = false;
1948 
1949     const bool gene_forbidden_if_genbank =
1950         ( subtype == CSeqFeatData::eSubtype_mobile_element ||
1951           subtype == CSeqFeatData::eSubtype_centromere ||
1952           subtype == CSeqFeatData::eSubtype_telomere );
1953 
1954     if ( type == CSeqFeatData::e_Gene ) {
1955     } else if (subtype != CSeqFeatData::eSubtype_operon &&
1956                subtype != CSeqFeatData::eSubtype_gap &&
1957                (is_not_genbank || ! gene_forbidden_if_genbank)) {
1958         if (feat_gene_xref) {
1959             if (feat_gene_xref->IsSuppressed()) {
1960                 suppressed = true;
1961             }
1962         }
1963         if (feat_gene_xref && ! suppressed &&
1964             ! CGeneFinder::ResolveGeneXref(feat_gene_xref, ctx.GetTopLevelEntry())) {
1965             gene_ref = feat_gene_xref;
1966         } else if ((! feat_gene_xref || ! suppressed) &&
1967                    subtype != CSeqFeatData::eSubtype_primer_bind) {
1968 
1969             bool is_mapped = false;
1970             try {
1971                 CMappedFeat mapped_gene = ctx.GetFeatTree().GetBestGene(m_Feat);
1972                 if (mapped_gene) {
1973                     gene_feat = mapped_gene.GetOriginalSeq_feat();
1974                     gene_ref = &gene_feat->GetData().GetGene();
1975                     is_mapped = true;
1976                 }
1977             } catch (CException&) {}
1978             if (! is_mapped) {
1979                 try {
1980                     CMappedFeat mapped_gene = m_Feat_Tree->GetBestGene(m_Feat);
1981                     if (mapped_gene) {
1982                         gene_feat = mapped_gene.GetOriginalSeq_feat();
1983                         gene_ref = &gene_feat->GetData().GetGene();
1984                         is_mapped = true;
1985                     }
1986                 } catch (CException&) {}
1987             }
1988             if (! is_mapped) {
1989                 try {
1990                     // e.g., check sig_peptide for gene overlapping parent CDS
1991                     CSeq_feat_Handle parent_feat_handle;
1992                     if( parentFeatureItem ) {
1993                         parent_feat_handle = parentFeatureItem->GetFeat();
1994                         CGeneFinder::GetAssociatedGeneInfo( m_Feat, ctx, m_Loc, m_GeneRef, gene_ref,
1995                             gene_feat, parent_feat_handle );
1996                     }
1997                 } catch (CException&) {}
1998             }
1999         }
2000     }
2001 
2002     bool pseudo = x_GetPseudo(gene_ref, gene_feat );
2003 
2004     //
2005     //  Collect qualifiers that are specific to a single or just a few feature
2006     //  types:
2007     //
2008     switch ( type ) {
2009     case CSeqFeatData::e_Cdregion:
2010         x_AddQualsCdregion(m_Feat, ctx, pseudo);
2011         break;
2012     case CSeqFeatData::e_Rna:
2013         x_AddQualsRna(m_Feat, ctx, pseudo);
2014         break;
2015     case CSeqFeatData::e_Prot:
2016         x_AddQualsProt(ctx, pseudo);
2017         break;
2018     case CSeqFeatData::e_Region:
2019         x_AddQualsRegion( ctx );
2020         break;
2021     case CSeqFeatData::e_Site:
2022         x_AddQualsSite( ctx );
2023         break;
2024     case CSeqFeatData::e_Bond:
2025         x_AddQualsBond( ctx );
2026         break;
2027     case CSeqFeatData::e_Psec_str:
2028         x_AddQualsPsecStr( ctx );
2029         break;
2030     case CSeqFeatData::e_Non_std_residue:
2031         x_AddQualsNonStd( ctx );
2032         break;
2033     case CSeqFeatData::e_Het:
2034         x_AddQualsHet( ctx );
2035         break;
2036     case CSeqFeatData::e_Variation:
2037         x_AddQualsVariation( ctx );
2038         break;
2039     default:
2040         break;
2041     }
2042 
2043     //
2044     //  Collect qualifiers that are common to most feature types:
2045     //
2046     x_AddQualPartial( ctx );
2047     x_AddQualDbXref( ctx );
2048     x_AddQualExt();
2049     x_AddQualExpInv( ctx );
2050     x_AddQualCitation();
2051     x_AddQualExceptions( ctx );
2052     x_AddQualNote( gene_feat );
2053     x_AddQualOldLocusTag( ctx, gene_feat );
2054     x_AddQualDb( gene_ref );
2055     x_AddQualGeneXref( gene_ref, gene_feat );
2056     x_AddQualOperon( ctx, subtype );
2057     x_AddQualsGene( ctx, gene_ref, gene_feat, gene_ref ? false : gene_feat.NotEmpty() );
2058 
2059     x_AddQualPseudo( ctx, type, subtype, pseudo );
2060     x_AddQualsGb( ctx );
2061 
2062     // dynamic mapping of old features to regulatory with regulatory_class qualifier
2063     if ( type == CSeqFeatData::e_Imp ) {
2064        x_AddQualsRegulatoryClass ( ctx, subtype );
2065     }
2066 
2067     x_AddQualSeqfeatNote(ctx);
2068 
2069     // cleanup (drop illegal quals, duplicate information etc.)
2070     x_CleanQuals( gene_ref );
2071 }
2072 
2073 
2074 static const string s_TrnaList[] = {
2075   "tRNA-Gap",
2076   "tRNA-Ala",
2077   "tRNA-Asx",
2078   "tRNA-Cys",
2079   "tRNA-Asp",
2080   "tRNA-Glu",
2081   "tRNA-Phe",
2082   "tRNA-Gly",
2083   "tRNA-His",
2084   "tRNA-Ile",
2085   "tRNA-Xle",
2086   "tRNA-Lys",
2087   "tRNA-Leu",
2088   "tRNA-Met",
2089   "tRNA-Asn",
2090   "tRNA-Pyl",
2091   "tRNA-Pro",
2092   "tRNA-Gln",
2093   "tRNA-Arg",
2094   "tRNA-Ser",
2095   "tRNA-Thr",
2096   "tRNA-Sec",
2097   "tRNA-Val",
2098   "tRNA-Trp",
2099   "tRNA-OTHER",
2100   "tRNA-Tyr",
2101   "tRNA-Glx",
2102   "tRNA-TERM"
2103 };
2104 
2105 
s_AaName(int aa)2106 static const string& s_AaName(int aa)
2107 {
2108     int idx = 255;
2109 
2110     if (aa != '*') {
2111         idx = aa - 64;
2112     } else {
2113         idx = 27;
2114     }
2115     if ( idx > 0 && idx < ArraySize(s_TrnaList) ) {
2116         return s_TrnaList [idx];
2117     }
2118     return kEmptyStr;
2119 }
2120 
2121 
s_ToIupacaa(int aa)2122 static int s_ToIupacaa(int aa)
2123 {
2124     vector<char> n(1, static_cast<char>(aa));
2125     vector<char> i;
2126     CSeqConvert::Convert(n, CSeqUtil::e_Ncbieaa, 0, 1, i, CSeqUtil::e_Iupacaa);
2127     return i.front();
2128 }
2129 
2130 //  ----------------------------------------------------------------------------
x_AddQualsRna(const CMappedFeat & feat,CBioseqContext & ctx,bool pseudo)2131 void CFeatureItem::x_AddQualsRna(
2132     const CMappedFeat& feat,
2133     CBioseqContext& ctx,
2134     bool pseudo )
2135 //  ----------------------------------------------------------------------------
2136 {
2137 
2138     CSeqFeatData::ESubtype subtype = m_Feat.GetData().GetSubtype();
2139     const CRNA_ref& rna = feat.GetData().GetRna();
2140     const CFlatFileConfig& cfg = ctx.Config();
2141     CScope& scope = ctx.GetScope();
2142 
2143     ///
2144     /// always output transcript_id
2145     ///
2146     {{
2147         EFeatureQualifier slot =
2148             (ctx.IsRefSeq()  ||  cfg.IsModeDump()  ||  cfg.IsModeGBench()) ?
2149                 eFQ_transcript_id : eFQ_transcript_id_note;
2150         try {
2151             if (feat.IsSetProduct()) {
2152                 CConstRef<CSeq_id> sip(feat.GetProduct().GetId());
2153                 if (sip) {
2154                     CBioseq_Handle prod =
2155                         scope.GetBioseqHandleFromTSE(*sip, ctx.GetHandle());
2156                     if ( prod ) {
2157                         x_AddProductIdQuals(prod, slot);
2158                     } else {
2159                         string acc;
2160                         sip->GetLabel(&acc, CSeq_id::eBoth);
2161                         CSeq_id_Handle idh = CSeq_id_Handle::GetHandle(*sip);
2162                         CSeq_id_Handle besth = sequence::GetId(idh, scope, sequence::eGetId_Best);
2163                         if (besth) {
2164                             acc.clear();
2165                             besth.GetSeqId()->GetLabel(&acc, CSeq_id::eContent);
2166                         }
2167                         if( acc.empty() && ! cfg.DropIllegalQuals() ) {
2168                             //sure of that? doesn't look right---
2169                             x_AddQual(slot, new CFlatStringQVal(
2170                                 NStr::NumericToString(sip->GetGi()) ) );
2171                         }
2172                         if (!acc.empty()) {
2173                             if ( !cfg.DropIllegalQuals()  ||  IsValidAccession(acc)) {
2174                                 CRef<CSeq_id> acc_id(new CSeq_id(acc));
2175                                 x_AddQual(slot, new CFlatSeqIdQVal(*acc_id));
2176                             }
2177                             /*
2178                             if (! (cfg.HideGI() || cfg.IsPolicyFtp())) {
2179                                 x_AddQual(eFQ_db_xref, new CFlatSeqIdQVal(*sip, true));
2180                             }
2181                             */
2182                         }
2183                     }
2184                 }
2185             }
2186         }
2187         catch (CObjmgrUtilException&) {
2188         }
2189      }}
2190 
2191     CRNA_ref::TType rna_type = rna.IsSetType() ?
2192         rna.GetType() : CRNA_ref::eType_unknown;
2193     switch ( rna_type ) {
2194     case CRNA_ref::eType_tRNA:
2195     {
2196         if ( !pseudo  &&  ( cfg.ShowTranscript() || cfg.IsFormatGBSeq() || cfg.IsFormatINSDSeq() ) ) {
2197             CSeqVector vec(feat.GetLocation(), scope);
2198             vec.SetCoding(CBioseq_Handle::eCoding_Iupac);
2199             string transcription;
2200             vec.GetSeqData(0, vec.size(), transcription);
2201             x_AddQual(eFQ_transcription, new CFlatStringQVal(transcription));
2202         }
2203         if (rna.IsSetExt()) {
2204             const CRNA_ref::C_Ext& ext = rna.GetExt();
2205             switch (ext.Which()) {
2206             case CRNA_ref::C_Ext::e_Name:
2207             {
2208                 // amino acid could not be parsed into structured form
2209                 if (!cfg.DropIllegalQuals()) {
2210                     x_AddQual(eFQ_product,
2211                         new CFlatStringQVal(ext.GetName()));
2212                 } else {
2213                     x_AddQual(eFQ_product,
2214                         new CFlatStringQVal("tRNA-OTHER"));
2215                 }
2216                 break;
2217             }
2218             case CRNA_ref::C_Ext::e_TRNA:
2219             {
2220                 const CTrna_ext& trna = ext.GetTRNA();
2221                 int aa = 0;
2222                 if ( trna.IsSetAa()  &&  trna.GetAa().IsNcbieaa() ) {
2223                     aa = trna.GetAa().GetNcbieaa();
2224                 }
2225                 if ( cfg.IupacaaOnly() ) {
2226                     aa = s_ToIupacaa(aa);
2227                 }
2228                 const string& aa_str = s_AaName(aa);
2229                 string amino_acid_str = aa_str;
2230 
2231                 if ( !aa_str.empty() ) {
2232                     const string& ac_str = aa_str;
2233                     if (NStr::CompareNocase (ac_str, "tRNA-Met") == 0) {
2234                         for (auto& gbqual : m_Feat.GetQual()) {
2235                             if (!gbqual->IsSetQual()  ||  !gbqual->IsSetVal()) continue;
2236                             if (NStr::CompareNocase( gbqual->GetQual(), "product") != 0) continue;
2237                             if (NStr::CompareNocase (gbqual->GetVal (), "tRNA-fMet") == 0) {
2238                                 amino_acid_str = "tRNA-fMet";
2239                             }
2240                             if (NStr::CompareNocase (gbqual->GetVal (), "tRNA-iMet") == 0) {
2241                                 amino_acid_str = "tRNA-iMet";
2242                             }
2243                         }
2244                     } else if (NStr::CompareNocase (ac_str, "tRNA-Ile") == 0) {
2245                         for (auto& gbqual : m_Feat.GetQual()) {
2246                             if (!gbqual->IsSetQual()  ||  !gbqual->IsSetVal()) continue;
2247                             if (NStr::CompareNocase( gbqual->GetQual(), "product") != 0) continue;
2248                             if (NStr::CompareNocase (gbqual->GetVal (), "tRNA-Ile2") == 0) {
2249                                 amino_acid_str = "tRNA-Ile2";
2250                             }
2251                         }
2252                     }
2253                     x_AddQual(eFQ_product, new CFlatStringQVal(amino_acid_str));
2254                     if ( trna.IsSetAnticodon()  &&  !ac_str.empty() ) {
2255                         x_AddQual(eFQ_anticodon,
2256                             new CFlatAnticodonQVal(trna.GetAnticodon(),
2257                                                    ac_str.substr(5, NPOS)));
2258                     }
2259                 }
2260                 if ( trna.IsSetCodon() ) {
2261                     const string& comment =
2262                         m_Feat.IsSetComment() ? m_Feat.GetComment() : kEmptyStr;
2263                     x_AddQual(eFQ_trna_codons, new CFlatTrnaCodonsQVal(trna, comment));
2264                 }
2265                 //x_AddQual(eFQ_exception_note, new CFlatStringQVal("tRNA features were annotated by tRNAscan-SE."));
2266                 break;
2267             }
2268             default:
2269                 break;
2270             } // end of internal switch
2271         }
2272         break;
2273     }
2274     case CRNA_ref::eType_mRNA:
2275     case CRNA_ref::eType_rRNA:
2276     {
2277         if ( !pseudo  &&  ( cfg.ShowTranscript() || cfg.IsFormatGBSeq() || cfg.IsFormatINSDSeq() ) ) {
2278             CSeqVector vec(feat.GetLocation(), scope);
2279             vec.SetCoding(CBioseq_Handle::eCoding_Iupac);
2280             string transcription;
2281             vec.GetSeqData(0, vec.size(), transcription);
2282             x_AddQual(eFQ_transcription, new CFlatStringQVal(transcription));
2283         }
2284         // intentional fall through
2285     }
2286     default:
2287         switch ( subtype ) {
2288 
2289         case CSeqFeatData::eSubtype_ncRNA: {
2290             if ( ! rna.IsSetExt() ) {
2291                 break;
2292             }
2293             const CRNA_ref_Base::TExt& ext = rna.GetExt();
2294             if ( ! ext.IsGen() ) {
2295                 break;
2296             }
2297             break;
2298         }
2299         case CSeqFeatData::eSubtype_tmRNA: {
2300             if ( ! rna.IsSetExt() ) {
2301                 break;
2302             }
2303             const CRNA_ref_Base::TExt& ext = rna.GetExt();
2304             if ( ext.IsGen()  &&  ext.GetGen().IsSetQuals() ) {
2305 
2306                 const list< CRef< CRNA_qual > >& quals = ext.GetGen().GetQuals().Get();
2307                 list< CRef< CRNA_qual > >::const_iterator it = quals.begin();
2308                 for ( ; it != quals.end(); ++it ) {
2309                     if ( (*it)->IsSetQual() && (*it)->IsSetVal() ) {
2310                         if ( (*it)->GetQual() == "tag_peptide" ) {
2311                             x_AddQual( eFQ_tag_peptide,
2312                                 new CFlatStringQVal(
2313                                     (*it)->GetVal(), CFormatQual::eUnquoted ) );
2314                             break;
2315                         }
2316                     }
2317                 }
2318             }
2319             break;
2320         }
2321         case CSeqFeatData::eSubtype_misc_RNA:
2322         case CSeqFeatData::eSubtype_otherRNA: {
2323             if ( ! rna.IsSetExt() ) {
2324                 break;
2325             }
2326             const CRNA_ref_Base::TExt& ext = rna.GetExt();
2327             if ( ext.IsName() ) {
2328                 string strName = ext.GetName();
2329                 if ( strName != "misc_RNA" ) {
2330                     x_AddQual( eFQ_product, new CFlatStringQVal( strName ) );
2331                 }
2332             }
2333             break;
2334         }
2335         default:
2336             if ( rna.IsSetExt()  &&  rna.GetExt().IsName() ) {
2337                 x_AddQual( eFQ_product, new CFlatStringQVal( rna.GetExt().GetName() ) );
2338             }
2339             break;
2340         }
2341     } // end of switch
2342 
2343     // some things to extract from RNA-gen
2344     if( rna.IsSetExt() && rna.GetExt().IsGen() ) {
2345         const CRNA_gen &gen = rna.GetExt().GetGen();
2346         if ( gen.IsSetClass() ) {
2347             if (gen.IsLegalClass()) {
2348                 x_AddQual( eFQ_ncRNA_class,
2349                     new CFlatStringQVal( gen.GetClass() ) );
2350             } else {
2351                 x_AddQual( eFQ_ncRNA_class,
2352                     new CFlatStringQVal( "other" ));
2353                 x_AddQual( eFQ_seqfeat_note,
2354                     new CFlatStringQVal( gen.GetClass() ) );
2355             }
2356         }
2357 
2358         if ( gen.IsSetProduct() && ! x_HasQual(eFQ_product) ) {
2359             x_AddQual( eFQ_product,
2360                 new CFlatStringQVal( gen.GetProduct() ) );
2361         }
2362     }
2363 }
2364 
2365 //  ----------------------------------------------------------------------------
x_AddQualTranslation(CBioseq_Handle & bsh,CBioseqContext & ctx,bool pseudo)2366 void CFeatureItem::x_AddQualTranslation(
2367     CBioseq_Handle& bsh,
2368     CBioseqContext& ctx,
2369     bool pseudo )
2370 //  ----------------------------------------------------------------------------
2371 {
2372     const CFlatFileConfig& cfg = ctx.Config();
2373     CScope& scope = ctx.GetScope();
2374 
2375     if ( pseudo || cfg.NeverTranslateCDS() ) {
2376         return;
2377     }
2378 
2379     string translation;
2380     if ( cfg.AlwaysTranslateCDS() || (cfg.TranslateIfNoProduct() && !bsh) ) {
2381         CSeqTranslator::Translate(m_Feat.GetOriginalFeature(), scope,
2382                                   translation, false /* don't include stops */);
2383     }
2384     else if ( bsh ) {
2385         CSeqVector seqv = bsh.GetSeqVector();
2386         /*
2387         CSeq_data::E_Choice coding = cfg.IupacaaOnly() ?
2388             CSeq_data::e_Iupacaa : CSeq_data::e_Ncbieaa;
2389         */
2390         CSeq_data::E_Choice coding = CSeq_data::e_Ncbieaa;
2391         seqv.SetCoding( coding );
2392 
2393         try {
2394             // an exception can occur here if the specified length doesn't match the actual length.
2395             // Although I don't know of any released .asn files with this problem, it can occur
2396             // in submissions.
2397             seqv.GetSeqData( 0, seqv.size(), translation );
2398         } catch( const CException & ) {
2399             // we're unable to do the translation
2400             translation.clear();
2401         }
2402     }
2403 
2404     if (!NStr::IsBlank(translation)) {
2405         x_AddQual(eFQ_translation, new CFlatStringQVal( translation ) );
2406     }
2407 }
2408 
2409 //  ----------------------------------------------------------------------------
x_AddQualTranslationTable(const CCdregion & cdr,CBioseqContext & ctx)2410 void CFeatureItem::x_AddQualTranslationTable(
2411     const CCdregion& cdr,
2412     CBioseqContext& ctx )
2413 //  ----------------------------------------------------------------------------
2414 {
2415     if ( ! cdr.IsSetCode() ) {
2416         return;
2417     }
2418     int gcode = cdr.GetCode().GetId();
2419     if ( gcode == 255 ) {
2420         return;
2421     }
2422     if ( ctx.Config().IsFormatGBSeq() || ctx.Config().IsFormatINSDSeq() || gcode > 1 ) {
2423         x_AddQual(eFQ_transl_table, new CFlatIntQVal(gcode));
2424     }
2425 }
2426 
2427 //  ----------------------------------------------------------------------------
x_AddQualCodonStart(const CCdregion & cdr,CBioseqContext & ctx)2428 void CFeatureItem::x_AddQualCodonStart(
2429     const CCdregion& cdr,
2430     CBioseqContext& ctx )
2431 //  ----------------------------------------------------------------------------
2432 {
2433     CCdregion::TFrame frame = cdr.GetFrame();
2434     if (frame == CCdregion::eFrame_not_set)
2435         frame = CCdregion::eFrame_one;
2436 
2437     // codon_start qualifier is always shown for nucleotides and for proteins mapped
2438     // from cDNA, otherwise only when the frame is not 1.
2439     if ( !ctx.IsProt() || !IsMappedFromCDNA() || frame != CCdregion::eFrame_one ) {
2440         x_AddQual( eFQ_codon_start, new CFlatIntQVal( frame ) );
2441     }
2442 }
2443 
2444 //  ----------------------------------------------------------------------------
x_AddQualCodonStartIdx(const CCdregion & cdr,CBioseqContext & ctx,const int inset)2445 void CFeatureItem::x_AddQualCodonStartIdx(
2446     const CCdregion& cdr,
2447     CBioseqContext& ctx,
2448     const int inset )
2449 //  ----------------------------------------------------------------------------
2450 {
2451     CCdregion::TFrame frame = cdr.GetFrame();
2452     if (frame == CCdregion::eFrame_not_set) {
2453         frame = CCdregion::eFrame_one;
2454     }
2455 
2456     if (inset == 1) {
2457         if (frame == CCdregion::eFrame_one) {
2458             frame = CCdregion::eFrame_three;
2459         } else if (frame == CCdregion::eFrame_two) {
2460             frame = CCdregion::eFrame_one;
2461         } else if (frame == CCdregion::eFrame_three) {
2462             frame = CCdregion::eFrame_two;
2463         }
2464     } else if (inset == 2) {
2465         if (frame == CCdregion::eFrame_one) {
2466             frame = CCdregion::eFrame_two;
2467         } else if (frame == CCdregion::eFrame_two) {
2468             frame = CCdregion::eFrame_three;
2469         } else if (frame == CCdregion::eFrame_three) {
2470             frame = CCdregion::eFrame_one;
2471         }
2472     }
2473 
2474     // codon_start qualifier is always shown for nucleotides and for proteins mapped
2475     // from cDNA, otherwise only when the frame is not 1.
2476     if ( !ctx.IsProt() || !IsMappedFromCDNA() || frame != CCdregion::eFrame_one ) {
2477         x_AddQual( eFQ_codon_start, new CFlatIntQVal( frame ) );
2478     }
2479 }
2480 
2481 //  ----------------------------------------------------------------------------
x_AddQualTranslationException(const CCdregion & cdr,CBioseqContext & ctx)2482 void CFeatureItem::x_AddQualTranslationException(
2483     const CCdregion& cdr,
2484     CBioseqContext& ctx )
2485 //  ----------------------------------------------------------------------------
2486 {
2487      if ( !ctx.IsProt() || !IsMappedFromCDNA() ) {
2488         if ( cdr.IsSetCode_break() ) {
2489             x_AddQual( eFQ_transl_except,
2490                 new CFlatCodeBreakQVal( cdr.GetCode_break() ) );
2491         }
2492 
2493     }
2494 }
2495 
2496 //  ----------------------------------------------------------------------------
x_AddQualTranslationExceptionIdx(const CCdregion & cdr,CBioseqContext & ctx,string & tr_ex)2497 void CFeatureItem::x_AddQualTranslationExceptionIdx(
2498     const CCdregion& cdr,
2499     CBioseqContext& ctx,
2500     string& tr_ex )
2501 //  ----------------------------------------------------------------------------
2502 {
2503      if ( !ctx.IsProt() || !IsMappedFromCDNA() ) {
2504         if ( cdr.IsSetCode_break() ) {
2505             x_AddQual( eFQ_transl_except,
2506                 new CFlatCodeBreakQVal( cdr.GetCode_break() ) );
2507         } else if ( tr_ex.length() > 0 ) {
2508             x_AddQual(eFQ_seqfeat_note, new CFlatStringQVal("unprocessed translation exception: " + tr_ex));
2509         }
2510     }
2511 }
2512 
2513 //  ----------------------------------------------------------------------------
x_AddQualProteinConflict(const CCdregion & cdr,CBioseqContext & ctx)2514 void CFeatureItem::x_AddQualProteinConflict(
2515     const CCdregion& cdr,
2516     CBioseqContext& ctx )
2517 //  ----------------------------------------------------------------------------
2518 {
2519     static const string conflict_msg =
2520         "Protein sequence is in conflict with the conceptual translation";
2521 
2522     const bool conflict_set = (cdr.IsSetConflict() && cdr.GetConflict());
2523 
2524     if (conflict_set)
2525     {
2526         if (!ctx.IsProt() || !IsMappedFromCDNA()) {
2527             bool has_prot = false;
2528             if (m_Feat.IsSetProduct() && m_Feat.GetProduct().GetId() != 0) {
2529                 has_prot = (sequence::GetLength(m_Feat.GetProduct(), &ctx.GetScope()) > 0);
2530             }
2531             if (has_prot) {
2532                 x_AddQual(eFQ_prot_conflict, new CFlatStringQVal(conflict_msg));
2533             }
2534         }
2535     }
2536 }
2537 
2538 //  ----------------------------------------------------------------------------
x_AddQualCodedBy(CBioseqContext & ctx)2539 void CFeatureItem::x_AddQualCodedBy(
2540     CBioseqContext& ctx )
2541 //  ----------------------------------------------------------------------------
2542 {
2543     //if ( ctx.IsProt()  &&  IsMappedFromCDNA() ) {
2544     if ( ctx.IsProt() ) {
2545         x_AddQual( eFQ_coded_by, new CFlatSeqLocQVal( m_Feat.GetLocation() ) );
2546     }
2547 }
2548 
2549 //  ----------------------------------------------------------------------------
x_AddQualProtComment(const CBioseq_Handle & protHandle)2550 void CFeatureItem::x_AddQualProtComment(
2551     const CBioseq_Handle& protHandle )
2552 //  ----------------------------------------------------------------------------
2553 {
2554     if ( ! protHandle ) {
2555         return;
2556     }
2557     CSeqdesc_CI comm( protHandle, CSeqdesc::e_Comment, 1 );
2558     if ( comm && !comm->GetComment().empty() ) {
2559         string comment = comm->GetComment();
2560 
2561         TrimSpacesAndJunkFromEnds( comment, true );
2562         /* const bool bAddPeriod = */ RemovePeriodFromEnd( comment, true );
2563         CFlatStringQVal *commentQVal = new CFlatStringQVal( comment );
2564         /* if( bAddPeriod ) {
2565             commentQVal->SetAddPeriod();
2566         } */
2567         x_AddQual( eFQ_prot_comment, commentQVal );
2568     }
2569 }
2570 
2571 //  ----------------------------------------------------------------------------
x_AddQualProtMethod(const CBioseq_Handle & protHandle)2572 void CFeatureItem::x_AddQualProtMethod(
2573     const CBioseq_Handle& protHandle )
2574 //  ----------------------------------------------------------------------------
2575 {
2576     if ( ! protHandle ) {
2577         return;
2578     }
2579     CSeqdesc_CI mi( protHandle, CSeqdesc::e_Molinfo );
2580     if ( mi ) {
2581         CMolInfo::TTech prot_tech = mi->GetMolinfo().GetTech();
2582         if ( prot_tech >  CMolInfo::eTech_standard       &&
2583              prot_tech != CMolInfo::eTech_concept_trans  &&
2584              prot_tech != CMolInfo::eTech_concept_trans_a ) {
2585             if ( !GetTechString( prot_tech ).empty() ) {
2586                 x_AddQual( eFQ_prot_method, new CFlatStringQVal(
2587                     "Method: " + GetTechString( prot_tech) ) );
2588             }
2589         }
2590     }
2591 }
2592 
2593 //  ----------------------------------------------------------------------------
x_GetAssociatedProtInfoIdx(CBioseqContext & ctx,CBioseq_Handle & protHandle,const CProt_ref * & protRef,CMappedFeat & protFeat,CConstRef<CSeq_id> & protId)2594 void CFeatureItem::x_GetAssociatedProtInfoIdx(
2595     CBioseqContext& ctx,
2596     CBioseq_Handle& protHandle,
2597     const CProt_ref*& protRef,
2598     CMappedFeat& protFeat,
2599     CConstRef<CSeq_id>& protId )
2600 //  ----------------------------------------------------------------------------
2601 {
2602     const CFlatFileConfig& cfg = ctx.Config();
2603     CScope& scope = ctx.GetScope();
2604 
2605     protId.Reset( m_Feat.GetProduct().GetId() );
2606     if ( protId ) {
2607         if ( !cfg.AlwaysTranslateCDS() ) {
2608             CScope::EGetBioseqFlag get_flag = CScope::eGetBioseq_Loaded;
2609             if ( cfg.ShowFarTranslations() || ctx.IsGED() || ctx.IsRefSeq() || cfg.IsPolicyFtp() ) {
2610                 get_flag = CScope::eGetBioseq_All;
2611             }
2612             protHandle =  scope.GetBioseqHandle(*protId, get_flag);
2613         }
2614     }
2615 
2616     CRef<CSeqEntryIndex> idx = ctx.GetSeqEntryIndex();
2617     if (! idx) return;
2618     CBioseq_Handle hdl = ctx.GetHandle();
2619     CRef<CBioseqIndex> bsx = idx->GetBioseqIndex (hdl);
2620     if (! bsx) return;
2621 
2622 
2623     protRef = 0;
2624     if ( protHandle ) {
2625         CRef<CSeqEntryIndex> idx = ctx.GetSeqEntryIndex();
2626         if (! idx) return;
2627         CRef<CBioseqIndex> bsx = idx->GetBioseqIndex (protHandle);
2628         if (bsx) {
2629             CRef<CFeatureIndex> pfx = bsx->GetBestProteinFeature();
2630             if (pfx) {
2631                 protFeat = pfx->GetMappedFeat();
2632                 if ( protFeat ) {
2633                     protRef = &( protFeat.GetData().GetProt() );
2634                 }
2635             }
2636         } else {
2637             x_GetAssociatedProtInfo(ctx, protHandle, protRef, protFeat, protId);
2638         }
2639     }
2640 }
2641 
2642 //  ----------------------------------------------------------------------------
x_GetAssociatedProtInfo(CBioseqContext & ctx,CBioseq_Handle & protHandle,const CProt_ref * & protRef,CMappedFeat & protFeat,CConstRef<CSeq_id> & protId)2643 void CFeatureItem::x_GetAssociatedProtInfo(
2644     CBioseqContext& ctx,
2645     CBioseq_Handle& protHandle,
2646     const CProt_ref*& protRef,
2647     CMappedFeat& protFeat,
2648     CConstRef<CSeq_id>& protId )
2649 //  ----------------------------------------------------------------------------
2650 {
2651     const CFlatFileConfig& cfg = ctx.Config();
2652     CScope& scope = ctx.GetScope();
2653 
2654     protId.Reset( m_Feat.GetProduct().GetId() );
2655     if ( protId ) {
2656         if ( !cfg.AlwaysTranslateCDS() ) {
2657             CScope::EGetBioseqFlag get_flag = CScope::eGetBioseq_Loaded;
2658             if ( cfg.ShowFarTranslations() || ctx.IsGED() || ctx.IsRefSeq() || cfg.IsPolicyFtp() ) {
2659                 get_flag = CScope::eGetBioseq_All;
2660             }
2661             protHandle =  scope.GetBioseqHandle(*protId, get_flag);
2662         }
2663     }
2664 
2665     protRef = 0;
2666     if ( protHandle ) {
2667         protFeat = s_GetBestProtFeature( protHandle );
2668         if ( protFeat ) {
2669             protRef = &( protFeat.GetData().GetProt() );
2670         }
2671     }
2672 }
2673 
2674 //  ----------------------------------------------------------------------------
x_AddQualProtNote(const CProt_ref * protRef,const CMappedFeat & protFeat)2675 void CFeatureItem::x_AddQualProtNote(
2676     const CProt_ref* protRef,
2677     const CMappedFeat& protFeat )
2678 //  ----------------------------------------------------------------------------
2679 {
2680     if ( ! protRef ) {
2681         return;
2682     }
2683     if ( protFeat.IsSetComment() ) {
2684         if ( protRef->GetProcessed() == CProt_ref::eProcessed_not_set  ||
2685                 protRef->GetProcessed() == CProt_ref::eProcessed_preprotein ) {
2686             string prot_note = protFeat.GetComment();
2687             TrimSpacesAndJunkFromEnds( prot_note, true );
2688             RemovePeriodFromEnd( prot_note, true );
2689             x_AddQual( eFQ_prot_note, new CFlatStringQVal( prot_note ) );
2690         }
2691     }
2692 }
2693 
2694 
2695 //  ----------------------------------------------------------------------------
x_AddQualProteinId(CBioseqContext & ctx,const CBioseq_Handle & protHandle,CConstRef<CSeq_id> protId)2696 void CFeatureItem::x_AddQualProteinId(
2697     CBioseqContext& ctx,
2698     const CBioseq_Handle& protHandle,
2699     CConstRef<CSeq_id> protId )
2700 //  ----------------------------------------------------------------------------
2701 {
2702     if ( protHandle ) {
2703         CConstRef<CBioseq> pBioseq( protHandle.GetCompleteBioseq() );
2704 
2705         // extract the *one* usable general seq-id (if there is one)
2706         // (the loop sets pTheOneGeneralSeqId, or leaves it NULL
2707         //  if there is zero or more than one usable general seqids)
2708         CConstRef<CSeq_id> pTheOneUsableGeneralSeqId;
2709         FOR_EACH_SEQID_ON_BIOSEQ(seqid_ci, *pBioseq) {
2710             const CSeq_id & seqid = **seqid_ci;
2711             if( ! seqid.IsGeneral() ) {
2712                 // not just general, so ignore all of them
2713                 pTheOneUsableGeneralSeqId.Reset();
2714                 break;
2715             }
2716 
2717             const CDbtag & db_tag = seqid.GetGeneral();
2718 
2719             // db types to ignore
2720             static const char* const sc_IgnoredDbs[] = {
2721                 "BankIt",
2722                 "NCBIFILE",
2723                 "PID",
2724                 "SMART",
2725                 "TMSMART",
2726             };
2727             typedef CStaticArraySet<const char*, PNocase> TIgnoredDbSet;
2728             DEFINE_STATIC_ARRAY_MAP(TIgnoredDbSet, sc_IgnoredDbSet, sc_IgnoredDbs );
2729 
2730             // get db and tag
2731             const string & sDb = GET_STRING_FLD_OR_BLANK(db_tag, Db);
2732             string sTag;
2733             if( FIELD_IS_SET(db_tag, Tag) ) {
2734                 stringstream sTagStrm;
2735                 db_tag.GetTag().AsString(sTagStrm);
2736                 // swap faster than assignment
2737                 sTagStrm.str().swap(sTag);
2738             }
2739 
2740             if( ! sDb.empty() && ! sTag.empty() &&
2741                 sc_IgnoredDbSet.find(sDb.c_str()) == sc_IgnoredDbSet.end() )
2742             {
2743                 if( pTheOneUsableGeneralSeqId ) {
2744                     // more than one, so ignore all of them
2745                     pTheOneUsableGeneralSeqId.Reset();
2746                     break;
2747                 } else {
2748                     pTheOneUsableGeneralSeqId = *seqid_ci;
2749                 }
2750             }
2751         }
2752 
2753         CSeq_id::E_Choice eLastRegularChoice = CSeq_id::e_not_set;
2754         FOR_EACH_SEQID_ON_BIOSEQ(seqid_ci, *pBioseq) {
2755             const CSeq_id & seqid = **seqid_ci;
2756 
2757             switch( seqid.Which() ) {
2758             case CSeq_id::e_Genbank: case CSeq_id::e_Embl: case CSeq_id::e_Ddbj:
2759             case CSeq_id::e_Other:
2760             case CSeq_id::e_Tpg: case CSeq_id::e_Tpe: case CSeq_id::e_Tpd:
2761             case CSeq_id::e_Gpipe:
2762                 x_AddQual( eFQ_protein_id, new CFlatSeqIdQVal( seqid ) );
2763                 eLastRegularChoice = seqid.Which();
2764                 break;
2765 
2766             case CSeq_id::e_Gi:
2767                 if( seqid.GetGi() > ZERO_GI ) {
2768                     const CFlatFileConfig& cfg = GetContext()->Config();
2769                     if (! (cfg.HideGI() || cfg.IsPolicyFtp())) {
2770                         if ( eLastRegularChoice == CSeq_id::e_not_set ) {
2771                             // use as protein_id if it's the first usable one
2772                             x_AddQual( eFQ_protein_id, new CFlatSeqIdQVal( seqid ) );
2773                         }
2774                         x_AddQual( eFQ_db_xref, new CFlatSeqIdQVal( seqid, true ) );
2775                     }
2776                 }
2777                 break;
2778 
2779             case CSeq_id::e_General:
2780                 // show it if it's the *one* usable general seqid.  otherwise, ignore
2781                 if( *seqid_ci == pTheOneUsableGeneralSeqId ) {
2782                     x_AddQual( eFQ_protein_id, new CFlatSeqIdQVal( seqid ) );
2783                 }
2784                 break;
2785 
2786             default:
2787                 // ignore other types
2788                 break;
2789             }
2790         }
2791     } else if( protId ) {
2792 
2793         TGi gi = ZERO_GI;
2794         string prot_acc;
2795 
2796         // get gi and prot_acc
2797         if ( protId->IsGi() ) {
2798             gi = protId->GetGi();
2799             if( gi > ZERO_GI ) {
2800                 try {
2801                     prot_acc = GetAccessionForGi( gi, ctx.GetScope() );
2802                 } catch ( CException& ) {}
2803             }
2804         } else {
2805 
2806             // swap is faster than assignment
2807             // protId->GetSeqIdString(true).swap( prot_acc );
2808             prot_acc = protId->GetSeqIdString(true);
2809 
2810             // find prot_acc and gi
2811             //const CTextseq_id* pTextSeq_id = protId->GetTextseq_Id();
2812             //if( pTextSeq_id ) {
2813             //    stringstream protAccStrm;
2814             //    pTextSeq_id->AsFastaString(protAccStrm);
2815             //    // swap is faster than assignment
2816             //    protAccStrm.str().swap( prot_acc );
2817 
2818             //}
2819             try {
2820                 gi = ctx.GetScope().GetGi( CSeq_id_Handle::GetHandle(*protId) );
2821             } catch(CException &) {
2822                 // could not get gi
2823             }
2824         }
2825 
2826         if( ! prot_acc.empty() ) {
2827             if ( ! ctx.Config().DropIllegalQuals() || IsValidAccession( prot_acc ) ) {
2828                 try {
2829                     CRef<CSeq_id> acc_id( new CSeq_id( prot_acc ) );
2830                     x_AddQual( eFQ_protein_id, new CFlatSeqIdQVal( *acc_id ) );
2831                 } catch( CException & ) {
2832                     x_AddQual( eFQ_protein_id, new CFlatStringQVal(prot_acc) );
2833                 }
2834             }
2835         }
2836 
2837         if( gi > ZERO_GI ) {
2838             CConstRef<CSeq_id> pGiSeqId(
2839                 protId->IsGi() ?
2840                 protId.GetPointer() :
2841                 new CSeq_id(CSeq_id::e_Gi, gi) );
2842             x_AddQual( eFQ_db_xref, new CFlatSeqIdQVal( *pGiSeqId, true ) );
2843         }
2844     }
2845 }
2846 
2847 //  ----------------------------------------------------------------------------
x_AddQualCdsProduct(CBioseqContext & ctx,const CProt_ref * protRef)2848 void CFeatureItem::x_AddQualCdsProduct(
2849     CBioseqContext& ctx,
2850     const CProt_ref* protRef )
2851 //  ----------------------------------------------------------------------------
2852 {
2853     if ( !protRef ) {
2854         return;
2855     }
2856 
2857     const CFlatFileConfig& cfg = ctx.Config();
2858     const CProt_ref::TName& names = protRef->GetName();
2859     if ( !names.empty() ) {
2860         if ( ! cfg.IsModeDump() ) {
2861             x_AddQual( eFQ_cds_product,
2862                 new CFlatStringQVal( names.front() ) );
2863             if ( names.size() > 1 ) {
2864                 x_AddQual( eFQ_prot_names,
2865                     new CFlatProductNamesQVal( names, m_Gene ) );
2866             }
2867 
2868         } else {
2869             ITERATE(CProt_ref::TName, it, names) {
2870                 x_AddQual( eFQ_cds_product, new CFlatStringQVal(*it) );
2871             }
2872         }
2873     }
2874 }
2875 
2876 //  ----------------------------------------------------------------------------
x_AddQualProtDesc(const CProt_ref * protRef)2877 void CFeatureItem::x_AddQualProtDesc(
2878     const CProt_ref* protRef )
2879 //  ----------------------------------------------------------------------------
2880 {
2881     if ( !protRef || !protRef->IsSetDesc() ) {
2882         return;
2883     }
2884 
2885     string desc = protRef->GetDesc();
2886     TrimSpacesAndJunkFromEnds( desc, true );
2887     bool add_period = RemovePeriodFromEnd( desc, true );
2888     CRef<CFlatStringQVal> prot_desc( new CFlatStringQVal( desc ) );
2889     if ( add_period ) {
2890         prot_desc->SetAddPeriod();
2891     }
2892     x_AddQual( eFQ_prot_desc, prot_desc );
2893 }
2894 
2895 //  ----------------------------------------------------------------------------
x_AddQualProtActivity(const CProt_ref * protRef)2896 void CFeatureItem::x_AddQualProtActivity(
2897     const CProt_ref* protRef )
2898 //  ----------------------------------------------------------------------------
2899 {
2900     if ( !protRef || protRef->GetActivity().empty() ) {
2901         return;
2902     }
2903     ITERATE (CProt_ref::TActivity, it, protRef->GetActivity()) {
2904         x_AddQual(eFQ_prot_activity, new CFlatStringQVal(*it));
2905     }
2906 }
2907 
2908 //  ----------------------------------------------------------------------------
x_AddQualProtEcNumber(CBioseqContext & ctx,const CProt_ref * protRef)2909 void CFeatureItem::x_AddQualProtEcNumber(
2910     CBioseqContext& ctx,
2911     const CProt_ref* protRef )
2912 //  ----------------------------------------------------------------------------
2913 {
2914     if ( !protRef || !protRef->IsSetEc()  ||  protRef->GetEc().empty() ) {
2915         return;
2916     }
2917 
2918     const CFlatFileConfig& cfg = ctx.Config();
2919     ITERATE(CProt_ref::TEc, ec, protRef->GetEc()) {
2920         if ( !cfg.DropIllegalQuals()  ||  s_IsLegalECNumber( *ec ) ) {
2921             x_AddQual( eFQ_prot_EC_number, new CFlatStringQVal( *ec ) );
2922         }
2923     }
2924 }
2925 
2926 //  ----------------------------------------------------------------------------
x_AddQualsCdregionIdx(const CMappedFeat & cds,CBioseqContext & ctx,bool pseudo)2927 void CFeatureItem::x_AddQualsCdregionIdx(
2928     const CMappedFeat& cds,
2929     CBioseqContext& ctx,
2930     bool pseudo)
2931 //  ----------------------------------------------------------------------------
2932 {
2933     CRef<CSeqEntryIndex> idx = ctx.GetSeqEntryIndex();
2934     if (! idx) return;
2935     CBioseq_Handle hdl = ctx.GetHandle();
2936     CRef<CBioseqIndex> bsx = idx->GetBioseqIndex (hdl);
2937     if (! bsx) return;
2938 
2939     const CCdregion& cdr = cds.GetData().GetCdregion();
2940 
2941     // const CSeq_loc& cdsloc = cds.GetLocation();
2942     const CSeq_loc& orgloc = cds.GetOriginalFeature().GetLocation();
2943     const CSeq_loc& bsploc = ctx.GetLocation();
2944 
2945     // cerr << "CDS " << MSerial_AsnText << cdsloc;
2946     // cerr << "ORG " << MSerial_AsnText << orgloc;
2947     // cerr << "BSP " << MSerial_AsnText << bsploc;
2948 
2949     int inset = 0;
2950     if ( ! ctx.GetLocation().IsWhole()) {
2951         if (bsploc.IsInt()) {
2952             const CSeq_interval& bspint = bsploc.GetInt();
2953             if ( orgloc.IsSetStrand() && orgloc.GetStrand() == eNa_strand_minus ) {
2954                 CBioseq_Handle& hdl = ctx.GetHandle();
2955                 if (hdl) {
2956                     int pos = bspint.GetTo();
2957                     // cerr << "PS " << pos << endl;
2958                     const CSeq_id* bid = bsploc.GetId();
2959                     ENa_strand strand = eNa_strand_minus;
2960                     CSeq_id& cid = const_cast<CSeq_id&>(*bid);
2961                     CConstRef<CSeq_loc> newloc(new CSeq_loc(cid, pos, pos, strand));
2962                     // cerr << "NEW " << MSerial_AsnText << newloc;
2963                     inset = sequence::LocationOffset(orgloc, *newloc, eOffset_FromStart, &ctx.GetScope());
2964                     // cerr << "IS " << inset << endl;
2965                 }
2966             } else {
2967                 int pos = bspint.GetFrom();
2968                 // cerr << "PS " << pos << endl;
2969                 const CSeq_id* bid = bsploc.GetId();
2970                 ENa_strand strand = eNa_strand_plus;
2971                 CSeq_id& cid = const_cast<CSeq_id&>(*bid);
2972                 CConstRef<CSeq_loc> newloc(new CSeq_loc(cid, pos, pos, strand));
2973                 // cerr << "NEW " << MSerial_AsnText << newloc;
2974                 inset = sequence::LocationOffset(orgloc, *newloc, eOffset_FromStart, &ctx.GetScope());
2975                 // cerr << "IS " << inset << endl;
2976             }
2977         }
2978     }
2979     if (inset < 0) {
2980         inset = 0;
2981     }
2982     inset = (inset % 3);
2983 
2984     const CProt_ref* protRef = 0;
2985     CMappedFeat protFeat;
2986     CConstRef<CSeq_id> prot_id;
2987 
2988     string tr_ex;
2989     for (auto& gbqual : cds.GetQual()) {
2990         if (!gbqual->IsSetQual()  ||  !gbqual->IsSetVal()) continue;
2991         if (NStr::CompareNocase( gbqual->GetQual(), "transl_except") != 0) continue;
2992         tr_ex = gbqual->GetVal ();
2993         break;
2994     }
2995     TQI it = m_Quals.begin();
2996      while ( it != m_Quals.end() ) {
2997         if ( it->first == eFQ_transl_except ) {
2998             it = m_Quals.Erase(it);
2999         } else {
3000             ++it;
3001         }
3002     }
3003 
3004     x_AddQualTranslationTable( cdr, ctx );
3005     x_AddQualCodonStartIdx( cdr, ctx, inset );
3006     x_AddQualTranslationExceptionIdx( cdr, ctx, tr_ex );
3007     x_AddQualProteinConflict( cdr, ctx );
3008     x_AddQualCodedBy( ctx );
3009     if ( ctx.IsProt()  &&  IsMappedFromCDNA() ) {
3010         return;
3011     }
3012 
3013     // protein qualifiers
3014     if (m_Feat.IsSetProduct()) {
3015         CBioseq_Handle prot =
3016             ctx.GetScope().GetBioseqHandle(m_Feat.GetProductId());
3017         x_GetAssociatedProtInfoIdx( ctx, prot, protRef, protFeat, prot_id );
3018         x_AddQualProtComment( prot );
3019         x_AddQualProtMethod( prot );
3020         x_AddQualProtNote( protRef, protFeat );
3021         x_AddQualProteinId( ctx, prot, prot_id );
3022         x_AddQualTranslation( prot, ctx, pseudo );
3023     }
3024 
3025     // add qualifiers where associated xref overrides the ref:
3026     const CProt_ref* protXRef = m_Feat.GetProtXref();
3027     if ( ! protXRef ) {
3028         protXRef = protRef;
3029     }
3030     x_AddQualCdsProduct( ctx, protXRef );
3031     x_AddQualProtDesc( protXRef );
3032     x_AddQualProtActivity( protXRef );
3033     x_AddQualProtEcNumber( ctx, protXRef );
3034 }
3035 
3036 //  ----------------------------------------------------------------------------
x_AddQualsCdregion(const CMappedFeat & cds,CBioseqContext & ctx,bool pseudo)3037 void CFeatureItem::x_AddQualsCdregion(
3038     const CMappedFeat& cds,
3039     CBioseqContext& ctx,
3040     bool pseudo)
3041 //  ----------------------------------------------------------------------------
3042 {
3043     const CCdregion& cdr = cds.GetData().GetCdregion();
3044 
3045     const CProt_ref* protRef = 0;
3046     CMappedFeat protFeat;
3047     CConstRef<CSeq_id> prot_id;
3048 
3049     x_AddQualTranslationTable( cdr, ctx );
3050     x_AddQualCodonStart( cdr, ctx );
3051     x_AddQualTranslationException( cdr, ctx );
3052     x_AddQualProteinConflict( cdr, ctx );
3053     x_AddQualCodedBy( ctx );
3054     if ( ctx.IsProt()  &&  IsMappedFromCDNA() ) {
3055         return;
3056     }
3057 
3058     // protein qualifiers
3059     if (m_Feat.IsSetProduct()) {
3060         CBioseq_Handle prot =
3061             ctx.GetScope().GetBioseqHandle(m_Feat.GetProductId());
3062         x_GetAssociatedProtInfo( ctx, prot, protRef, protFeat, prot_id );
3063         x_AddQualProtComment( prot );
3064         x_AddQualProtMethod( prot );
3065         x_AddQualProtNote( protRef, protFeat );
3066         x_AddQualProteinId( ctx, prot, prot_id );
3067         x_AddQualTranslation( prot, ctx, pseudo );
3068     }
3069 
3070     // add qualifiers where associated xref overrides the ref:
3071     const CProt_ref* protXRef = m_Feat.GetProtXref();
3072     if ( ! protXRef ) {
3073         protXRef = protRef;
3074     }
3075     x_AddQualCdsProduct( ctx, protXRef );
3076     x_AddQualProtDesc( protXRef );
3077     x_AddQualProtActivity( protXRef );
3078     x_AddQualProtEcNumber( ctx, protXRef );
3079 }
3080 
s_ScoreSeqIdHandle(const CSeq_id_Handle & idh)3081 static int s_ScoreSeqIdHandle(const CSeq_id_Handle& idh)
3082 {
3083     CConstRef<CSeq_id> id = idh.GetSeqId();
3084     CRef<CSeq_id> id_non_const
3085         (const_cast<CSeq_id*>(id.GetPointer()));
3086     return CSeq_id::Score(id_non_const);
3087 }
3088 
3089 
s_FindBestIdChoice(const CBioseq_Handle::TId & ids)3090 CSeq_id_Handle s_FindBestIdChoice(const CBioseq_Handle::TId& ids)
3091 {
3092     //
3093     //  Objective:
3094     //  Find the best choice among a given subset of id types. I.e. if a certain
3095     //  id scores well but is not of a type we approve of, we still reject it.
3096     //
3097     CBestChoiceTracker< CSeq_id_Handle, int (*)(const CSeq_id_Handle&) >
3098         tracker(s_ScoreSeqIdHandle);
3099 
3100     ITERATE( CBioseq_Handle::TId, it, ids ) {
3101         switch( (*it).Which() ) {
3102             case CSeq_id::e_Genbank:
3103             case CSeq_id::e_Embl:
3104             case CSeq_id::e_Ddbj:
3105             case CSeq_id::e_Gi:
3106             case CSeq_id::e_Other:
3107             case CSeq_id::e_General:
3108             case CSeq_id::e_Tpg:
3109             case CSeq_id::e_Tpe:
3110             case CSeq_id::e_Tpd:
3111             case CSeq_id::e_Gpipe:
3112                 tracker(*it);
3113                 break;
3114             default:
3115                 break;
3116         }
3117     }
3118     return tracker.GetBestChoice();
3119 }
3120 
3121 //  ---------------------------------------------------------------------------
x_AddProductIdQuals(CBioseq_Handle & prod,EFeatureQualifier slot)3122 void CFeatureItem::x_AddProductIdQuals(
3123     CBioseq_Handle& prod,
3124     EFeatureQualifier slot)
3125 //  ---------------------------------------------------------------------------
3126 {
3127     //
3128     //  Objective (according to the C toolkit):
3129     //  We need one (and only one) /xxx_id tag. If there are multiple ids
3130     //
3131 
3132     if (!prod) {
3133         return;
3134     }
3135     const CBioseq_Handle::TId& ids = prod.GetId();
3136     if (ids.empty()) {
3137         return;
3138     }
3139 
3140     CSeq_id_Handle best = s_FindBestIdChoice(ids);
3141     if (!best) {
3142         return;
3143     }
3144     x_AddQual(slot, new CFlatSeqIdQVal(*best.GetSeqId()));
3145 
3146     if( m_Feat.GetData().IsCdregion() || ! GetContext()->IsProt() ) {
3147         const CFlatFileConfig& cfg = GetContext()->Config();
3148         ITERATE( CBioseq_Handle::TId, id_iter, ids ) {
3149             if( id_iter->IsGi() ) {
3150                 if (! (cfg.HideGI() || cfg.IsPolicyFtp())) {
3151                     x_AddQual( eFQ_db_xref,
3152                         new CFlatStringQVal("GI:" + NStr::NumericToString(id_iter->GetGi()) ));
3153                 }
3154             }
3155         }
3156     }
3157 }
3158 
3159 //  ----------------------------------------------------------------------------
x_AddQualsRegion(CBioseqContext & ctx)3160 void CFeatureItem::x_AddQualsRegion(
3161     CBioseqContext& ctx )
3162 //  ----------------------------------------------------------------------------
3163 {
3164     _ASSERT( m_Feat.GetData().IsRegion() );
3165 
3166     //cerr << MSerial_AsnText << m_Feat.GetOriginalFeature();
3167 
3168     const CSeqFeatData& data = m_Feat.GetData();
3169     const string &region = data.GetRegion();
3170     if ( region.empty() ) {
3171         return;
3172     }
3173 
3174     if ( ctx.IsProt()  &&
3175          data.GetSubtype() == CSeqFeatData::eSubtype_region )
3176     {
3177         x_AddQual(eFQ_region_name, new CFlatStringQVal(region));
3178     } else {
3179         x_AddQual(eFQ_region, new CFlatStringQVal("Region: " + region));
3180     }
3181 
3182     /// parse CDD data from the user object
3183     list< CConstRef<CUser_object> > objs;
3184     if (m_Feat.IsSetExt()) {
3185         objs.push_back(CConstRef<CUser_object>(&m_Feat.GetExt()));
3186     }
3187     if (m_Feat.IsSetExts()) {
3188         copy(m_Feat.GetExts().begin(), m_Feat.GetExts().end(),
3189              back_inserter(objs));
3190     }
3191 
3192     ITERATE (list< CConstRef<CUser_object> >, it, objs) {
3193         const CUser_object& obj = **it;
3194         bool found = false;
3195         if (obj.IsSetType()  &&
3196             obj.GetType().IsStr()  &&
3197             obj.GetType().GetStr() == "cddScoreData") {
3198             CConstRef<CUser_field> f = obj.GetFieldRef("definition");
3199             if (f) {
3200                 CUser_field_Base::C_Data::TStr definition_str = f->GetData().GetStr();
3201                 RemovePeriodFromEnd(definition_str, true);
3202                 if( ! s_StrEqualDisregardFinalPeriod(definition_str, region, NStr::eNocase) ) {
3203                     x_AddQual(eFQ_region,
3204                         new CFlatStringQVal(definition_str));
3205                     found = true;
3206                 }
3207                 break;
3208 
3209                 /**
3210                 if (ctx.IsProt()) {
3211                     if (f->GetData().GetStr() != region  ||  added_raw) {
3212                         x_AddQual(eFQ_region,
3213                                   new CFlatStringQVal(f->GetData().GetStr()));
3214                     }
3215                 } else {
3216                     x_AddQual(eFQ_region,
3217                               new CFlatStringQVal(f->GetData().GetStr()));
3218                 }
3219 
3220                 found = true;
3221                 break;
3222                 **/
3223 
3224                 /**
3225                 if (ctx.IsProt()  &&  region == f->GetData().GetStr()) {
3226                     /// skip
3227                 } else {
3228                     x_AddQual(eFQ_region,
3229                               new CFlatStringQVal(f->GetData().GetStr()));
3230                     found = true;
3231                     break;
3232                 }
3233                 **/
3234             }
3235         }
3236 
3237         if (found) {
3238             break;
3239         }
3240     }
3241 }
3242 
3243 
3244 //  ----------------------------------------------------------------------------
x_AddQualsBond(CBioseqContext & ctx)3245 void CFeatureItem::x_AddQualsBond(
3246     CBioseqContext& ctx )
3247 //  ----------------------------------------------------------------------------
3248 {
3249     _ASSERT( m_Feat.GetData().IsBond() );
3250 
3251     const CSeqFeatData& data = m_Feat.GetData();
3252     const string& bond = s_GetBondName( data.GetBond() );
3253     if ( NStr::IsBlank( bond ) ) {
3254         return;
3255     }
3256 
3257     if ( ctx.IsGenbankFormat()  &&  ctx.IsProt() ) {
3258         x_AddQual( eFQ_bond_type, new CFlatStringQVal( bond ) );
3259     } else {
3260         x_AddQual( eFQ_bond, new CFlatBondQVal( bond ) );
3261     }
3262 }
3263 
3264 //  ----------------------------------------------------------------------------
x_AddQualsPsecStr(CBioseqContext & ctx)3265 void CFeatureItem::x_AddQualsPsecStr(
3266     CBioseqContext& ctx )
3267 //  ----------------------------------------------------------------------------
3268 {
3269     _ASSERT( m_Feat.GetData().IsPsec_str() );
3270 
3271     const CSeqFeatData& data = m_Feat.GetData();
3272 
3273     CSeqFeatData_Base::TPsec_str sec_str_type = data.GetPsec_str();
3274 
3275     string sec_str_as_str = CSeqFeatData_Base::GetTypeInfo_enum_EPsec_str()->FindName( sec_str_type, true );
3276     x_AddQual( eFQ_sec_str_type, new CFlatStringQVal( sec_str_as_str ) );
3277 }
3278 
3279 //  ----------------------------------------------------------------------------
x_AddQualsNonStd(CBioseqContext & ctx)3280 void CFeatureItem::x_AddQualsNonStd(
3281     CBioseqContext& ctx )
3282 //  ----------------------------------------------------------------------------
3283 {
3284     _ASSERT( m_Feat.GetData().IsNon_std_residue() );
3285 
3286     const CSeqFeatData& data = m_Feat.GetData();
3287 
3288     CSeqFeatData_Base::TNon_std_residue n_s_res = data.GetNon_std_residue();
3289 
3290     x_AddQual( eFQ_non_std_residue, new CFlatStringQVal( n_s_res ) );
3291 }
3292 
3293 //  ----------------------------------------------------------------------------
x_AddQualsHet(CBioseqContext & ctx)3294 void CFeatureItem::x_AddQualsHet(
3295     CBioseqContext& ctx )
3296 //  ----------------------------------------------------------------------------
3297 {
3298     _ASSERT( m_Feat.GetData().IsHet() );
3299 
3300     const CSeqFeatData& data = m_Feat.GetData();
3301 
3302     CSeqFeatData_Base::THet het = data.GetHet();
3303 
3304     x_AddQual( eFQ_heterogen, new CFlatStringQVal( het.Get() ) );
3305 }
3306 
3307 //  ----------------------------------------------------------------------------
x_AddQualsVariation(CBioseqContext & ctx)3308 void CFeatureItem::x_AddQualsVariation(
3309     CBioseqContext& ctx )
3310 //  ----------------------------------------------------------------------------
3311 {
3312     _ASSERT( m_Feat.GetData().IsVariation() );
3313 
3314     const CSeqFeatData& data = m_Feat.GetData();
3315     const CSeqFeatData_Base::TVariation& variation = data.GetVariation();
3316 
3317     // Make the /db_xref qual
3318     if( variation.CanGetId() ) {
3319         const CVariation_ref_Base::TId& dbt = variation.GetId();
3320         // the id tag is quite specific (e.g. db must be "dbSNP", etc.) or it won't print
3321         if ( dbt.IsSetDb()  &&  !dbt.GetDb().empty()  &&
3322                 dbt.IsSetTag() && dbt.GetTag().IsStr() ) {
3323             const string &oid_str = dbt.GetTag().GetStr();
3324             if( dbt.GetDb() == "dbSNP" && NStr::StartsWith(oid_str, "rs" ) ) {
3325                 x_AddQual(eFQ_db_xref,  new CFlatStringQVal( dbt.GetDb() + ":" + oid_str.substr( 2 ) ) );
3326             }
3327         }
3328     }
3329 
3330     // Make the /replace quals:
3331     if( variation.CanGetData() && variation.GetData().IsInstance() &&
3332             variation.GetData().GetInstance().CanGetDelta() ) {
3333         const CVariation_inst_Base::TDelta& delta = variation.GetData().GetInstance().GetDelta();
3334         ITERATE( CVariation_inst_Base::TDelta, delta_iter, delta ) {
3335             if( *delta_iter && (*delta_iter)->CanGetSeq() ) {
3336                 const CDelta_item_Base::TSeq& seq = (*delta_iter)->GetSeq();
3337                 if( seq.IsLiteral() && seq.GetLiteral().CanGetSeq_data() ) {
3338                     const CDelta_item_Base::C_Seq::TLiteral& seq_literal = seq.GetLiteral();
3339                     const CSeq_literal_Base::TSeq_data& seq_data = seq_literal.GetSeq_data();
3340 
3341                     // convert the data to the standard a,c,g,t
3342                     CSeq_data iupacna_seq_data;
3343                     CSeqportUtil::Convert( seq_data,
3344                         &iupacna_seq_data,
3345                         CSeq_data::e_Iupacna );
3346                     string nucleotides = iupacna_seq_data.GetIupacna().Get();
3347 
3348                     // if the specified length and the length of the data conflict,
3349                     // use the smaller
3350                     const string::size_type max_len_allowed = seq_literal.GetLength();
3351                     if( nucleotides.size() > max_len_allowed ) {
3352                         nucleotides.resize( max_len_allowed );
3353                     }
3354 
3355                     NStr::ToLower( nucleotides );
3356 
3357                     if (!NStr::IsBlank(nucleotides)) {
3358                         x_AddQual(eFQ_replace, new CFlatStringQVal(nucleotides));
3359                     }
3360                 }
3361             }
3362         }
3363     }
3364 }
3365 
s_GetSiteName(CSeqFeatData::TSite site)3366 static const string& s_GetSiteName(CSeqFeatData::TSite site)
3367 {
3368     static const string kOther = "other";
3369     static const string kDnaBinding = "DNA binding";
3370     static const string kInhibit = "inhibition";
3371 
3372     switch (site) {
3373     case CSeqFeatData::eSite_other:
3374         return kOther;
3375     case CSeqFeatData::eSite_dna_binding:
3376         return kDnaBinding;
3377     case CSeqFeatData::eSite_inhibit:
3378         return kInhibit;
3379 
3380     default:
3381         return CSeqFeatData::ENUM_METHOD_NAME(ESite)()->FindName(site, true);
3382     }
3383 }
3384 
3385 //  ----------------------------------------------------------------------------
x_AddQualsSite(CBioseqContext & ctx)3386 void CFeatureItem::x_AddQualsSite(
3387     CBioseqContext& ctx )
3388 //  ----------------------------------------------------------------------------
3389 {
3390     _ASSERT( m_Feat.GetData().IsSite() );
3391 
3392     const CSeqFeatData& data = m_Feat.GetData();
3393     CSeqFeatData::TSite site = data.GetSite();
3394     const string& site_name = s_GetSiteName( site );
3395 
3396     // ID-4627 : site_type qualifier is needed for GBSeq/INSDSeq XMl too
3397     if ( (ctx.Config().IsFormatGenbank() ||
3398           ctx.Config().IsFormatGBSeq() ||
3399           ctx.Config().IsFormatINSDSeq()) &&  ctx.IsProt() ) {
3400         x_AddQual(eFQ_site_type, new CFlatSiteQVal( site_name ) );
3401     } else {
3402         if ( !m_Feat.IsSetComment() ||
3403             ( NStr::Find( m_Feat.GetComment(), site_name ) == NPOS ) ) {
3404             x_AddQual( eFQ_site, new CFlatSiteQVal( site_name ) );
3405         }
3406     }
3407 }
3408 
3409 //  ----------------------------------------------------------------------------
x_AddQualsExt(const CUser_field & field,const CSeq_feat::TExt & ext)3410 void CFeatureItem::x_AddQualsExt(
3411     const CUser_field& field, const CSeq_feat::TExt& ext )
3412 //  ----------------------------------------------------------------------------
3413 {
3414     if ( field.IsSetLabel()  &&  field.GetLabel().IsStr() ) {
3415         const string& oid = field.GetLabel().GetStr();
3416         if ( oid == "ModelEvidence" ) {
3417             FOR_EACH_GBQUAL_ON_SEQFEAT (gbq_itr, m_Feat) {
3418                 const CGb_qual& gbq = **gbq_itr;
3419                 if (gbq.IsSetQual()) {
3420                     if (NStr::Equal (gbq.GetQual(), "experiment")) return;
3421                 }
3422             }
3423             x_AddQual(eFQ_modelev, new CFlatModelEvQVal(ext));
3424         } else if ( oid == "Process" || oid == "Component" || oid == "Function" ) {
3425             x_AddGoQuals(field);
3426         }
3427     }
3428 }
3429 
3430 //  ----------------------------------------------------------------------------
x_AddQualsExt(const CSeq_feat::TExt & ext)3431 void CFeatureItem::x_AddQualsExt(
3432     const CSeq_feat::TExt& ext )
3433 //  ----------------------------------------------------------------------------
3434 {
3435     ITERATE (CUser_object::TData, it, ext.GetData()) {
3436         const CUser_field& field = **it;
3437         if ( !field.IsSetData() ) {
3438             continue;
3439         }
3440         if ( field.GetData().IsObject() ) {
3441             const CUser_object& obj = field.GetData().GetObject();
3442             x_AddQualsExt(obj);
3443         } else if ( field.GetData().IsObjects() ) {
3444             ITERATE (CUser_field::C_Data::TObjects, o, field.GetData().GetObjects()) {
3445                 x_AddQualsExt(**o);
3446             }
3447           } else if ( field.GetData().IsFields() ) {
3448               ITERATE (CUser_field::C_Data::TFields, o, field.GetData().GetFields()) {
3449                   // x_AddGoQuals(**o);
3450                   x_AddQualsExt(**o, ext);
3451               }
3452         }
3453     }
3454     if ( ext.IsSetType()  &&  ext.GetType().IsStr() ) {
3455         const string& oid = ext.GetType().GetStr();
3456         if ( oid == "ModelEvidence" ) {
3457             FOR_EACH_GBQUAL_ON_SEQFEAT (gbq_itr, m_Feat) {
3458                 const CGb_qual& gbq = **gbq_itr;
3459                 if (gbq.IsSetQual()) {
3460                     if (NStr::Equal (gbq.GetQual(), "experiment")) return;
3461                 }
3462             }
3463             x_AddQual(eFQ_modelev, new CFlatModelEvQVal(ext));
3464         } else if ( oid == "GeneOntology" ) {
3465             x_AddGoQuals(ext);
3466         }
3467     }
3468 }
3469 
3470 //  ----------------------------------------------------------------------------
x_AddQualDbXref(CBioseqContext & ctx)3471 void CFeatureItem::x_AddQualDbXref(
3472     CBioseqContext& ctx )
3473 //  ----------------------------------------------------------------------------
3474 {
3475     if ( m_Feat.IsSetProduct()  &&
3476         ( !m_Feat.GetData().IsCdregion()  &&  ctx.IsProt() && ! IsMappedFromProt() ) ) {
3477         CBioseq_Handle prod =
3478             ctx.GetScope().GetBioseqHandle( m_Feat.GetProductId() );
3479         if ( prod ) {
3480             const CBioseq_Handle::TId& ids = prod.GetId();
3481             if ( ! ids.empty() ) {
3482                 ITERATE (CBioseq_Handle::TId, it, ids) {
3483                     if ( it->Which() != CSeq_id::e_Gi ) {
3484                         continue;
3485                     }
3486                     CConstRef<CSeq_id> id = it->GetSeqId();
3487                     if (!id->IsGeneral()) {
3488                         x_AddQual(eFQ_db_xref, new CFlatSeqIdQVal(*id, id->IsGi()));
3489                     }
3490                 }
3491             }
3492         }
3493     }
3494     if ( ! m_Feat.IsSetDbxref() ) {
3495         return ;
3496     }
3497     x_AddQual( eFQ_db_xref, new CFlatXrefQVal( m_Feat.GetDbxref(), &m_Quals ) );
3498 }
3499 
3500 //  ----------------------------------------------------------------------------
x_AddGoQuals(const CUser_field & field)3501 void CFeatureItem::x_AddGoQuals(
3502     const CUser_field& field )
3503 //  ----------------------------------------------------------------------------
3504 {
3505     if ( field.IsSetLabel()  &&  field.GetLabel().IsStr() ) {
3506         const string& label = field.GetLabel().GetStr();
3507         EFeatureQualifier slot = eFQ_none;
3508         if ( label == "Process" ) {
3509             slot = eFQ_go_process;
3510         } else if ( label == "Component" ) {
3511             slot = eFQ_go_component;
3512         } else if ( label == "Function" ) {
3513             slot = eFQ_go_function;
3514         }
3515         if ( slot == eFQ_none ) {
3516             return;
3517         }
3518 
3519         ITERATE (CUser_field::TData::TFields, it, field.GetData().GetFields()) {
3520             if ( (*it)->GetData().IsFields() ) {
3521                 CRef<CFlatGoQVal> go_val( new CFlatGoQVal(**it) );
3522 
3523                 bool okay_to_add = true;
3524 
3525                 // check for dups
3526                 CFeatureItem::TQCI iter = x_GetQual(slot);
3527                 for ( ; iter != m_Quals.end()  &&  iter->first == slot; ++iter) {
3528                     const CFlatGoQVal & qual = dynamic_cast<const CFlatGoQVal &>( *iter->second );
3529                     if( qual.Equals(*go_val) )
3530                     {
3531                         okay_to_add = false;
3532                         break;
3533                     }
3534                 }
3535 
3536                 if( okay_to_add ) {
3537                     x_AddQual(slot, go_val);
3538                 }
3539             }
3540         }
3541     }
3542 }
3543 
3544 //  ----------------------------------------------------------------------------
x_AddGoQuals(const CUser_object & uo)3545 void CFeatureItem::x_AddGoQuals(
3546     const CUser_object& uo )
3547 //  ----------------------------------------------------------------------------
3548 {
3549     ITERATE (CUser_object::TData, uf_it, uo.GetData()) {
3550         const CUser_field& field = **uf_it;
3551         if ( field.IsSetLabel()  &&  field.GetLabel().IsStr() ) {
3552             const string& label = field.GetLabel().GetStr();
3553             EFeatureQualifier slot = eFQ_none;
3554             if ( label == "Process" ) {
3555                 slot = eFQ_go_process;
3556             } else if ( label == "Component" ) {
3557                 slot = eFQ_go_component;
3558             } else if ( label == "Function" ) {
3559                 slot = eFQ_go_function;
3560             }
3561             if ( slot == eFQ_none ) {
3562                 continue;
3563             }
3564 
3565             ITERATE (CUser_field::TData::TFields, it, field.GetData().GetFields()) {
3566                 if ( (*it)->GetData().IsFields() ) {
3567                     CRef<CFlatGoQVal> go_val( new CFlatGoQVal(**it) );
3568 
3569                     bool okay_to_add = true;
3570 
3571                     // check for dups
3572                     CFeatureItem::TQCI iter = x_GetQual(slot);
3573                     for ( ; iter != m_Quals.end()  &&  iter->first == slot; ++iter) {
3574                         const CFlatGoQVal & qual = dynamic_cast<const CFlatGoQVal &>( *iter->second );
3575                         if( qual.Equals(*go_val) )
3576                         {
3577                             okay_to_add = false;
3578                             break;
3579                         }
3580                     }
3581 
3582                     if( okay_to_add ) {
3583                         x_AddQual(slot, go_val);
3584                     }
3585                 }
3586             }
3587         }
3588     }
3589 }
3590 
3591 //  ----------------------------------------------------------------------------
x_AddQualsGene(const CBioseqContext & ctx,const CGene_ref * gene_ref,CConstRef<CSeq_feat> & gene_feat,bool from_overlap)3592 void CFeatureItem::x_AddQualsGene(
3593     const CBioseqContext& ctx,
3594     const CGene_ref* gene_ref,
3595     CConstRef<CSeq_feat>& gene_feat,
3596     bool from_overlap )
3597 //  ----------------------------------------------------------------------------
3598 {
3599     const CSeqFeatData& data = m_Feat.GetData();
3600     CSeqFeatData::ESubtype subtype = data.GetSubtype();
3601 
3602     if ( m_Feat.GetData().Which() == CSeqFeatData::e_Gene ) {
3603         gene_ref = &( m_Feat.GetData().GetGene() );
3604     }
3605     if ( ! gene_ref && gene_feat ) {
3606         gene_ref = & gene_feat->GetData().GetGene();
3607     }
3608 
3609     if ( ! gene_ref || gene_ref->IsSuppressed() ) {
3610         return;
3611     }
3612 
3613     const bool is_gene = (subtype == CSeqFeatData::eSubtype_gene);
3614 
3615     const bool okay_to_propage = (subtype != CSeqFeatData::eSubtype_mobile_element &&
3616                                   subtype != CSeqFeatData::eSubtype_centromere &&
3617                                   subtype != CSeqFeatData::eSubtype_telomere);
3618 
3619     const string* locus = (gene_ref->IsSetLocus()  &&  !NStr::IsBlank(gene_ref->GetLocus())) ?
3620         &gene_ref->GetLocus() : NULL;
3621     const string* desc = (gene_ref->IsSetDesc() &&  !NStr::IsBlank(gene_ref->GetDesc())) ?
3622         &gene_ref->GetDesc() : NULL;
3623     const TGeneSyn* syn = (gene_ref->IsSetSyn()  &&  !gene_ref->GetSyn().empty()) ?
3624         &gene_ref->GetSyn() : NULL;
3625     const string* locus_tag =
3626         (gene_ref->IsSetLocus_tag()  &&  !NStr::IsBlank(gene_ref->GetLocus_tag())) ?
3627         &gene_ref->GetLocus_tag() : 0;
3628 
3629     if ( ctx.IsProt() ) {
3630         // skip if GenPept format and not gene or CDS
3631         if (subtype != CSeqFeatData::eSubtype_gene && subtype != CSeqFeatData::eSubtype_cdregion) {
3632             return;
3633         }
3634     }
3635 
3636     //  gene:
3637     if ( !from_overlap  ||  okay_to_propage ) {
3638         if ( locus != 0 ) {
3639             m_Gene = *locus;
3640         }
3641         else if ( ( desc != 0 ) && okay_to_propage ) {
3642             m_Gene = *desc;
3643         }
3644         else if (syn != NULL) {
3645             CGene_ref::TSyn syns = *syn;
3646             m_Gene = syns.front();
3647         }
3648         if( !m_Gene.empty() ) {
3649             // we suppress the /gene qual when there's no locus but there is a locus tag (imitates C toolkit)
3650             if ( NULL != locus || NULL == locus_tag ) {
3651                 x_AddQual(eFQ_gene, new CFlatGeneQVal(m_Gene));
3652             }
3653         }
3654     }
3655 
3656     //  locus tag:
3657     if ( gene_ref  ||  okay_to_propage ) {
3658         if (locus != NULL) {
3659             if (locus_tag != NULL) {
3660                 x_AddQual(eFQ_locus_tag, new CFlatStringQVal(*locus_tag, CFormatQual::eTrim_WhitespaceOnly));
3661             }
3662         }
3663         else if (locus_tag != NULL) {
3664             x_AddQual(eFQ_locus_tag, new CFlatStringQVal(*locus_tag, CFormatQual::eTrim_WhitespaceOnly));
3665         }
3666     }
3667 
3668     //  gene desc:
3669     if ( gene_ref  ||  okay_to_propage ) {
3670         if (locus != NULL) {
3671             if (is_gene  &&  desc != NULL) {
3672                 string desc_cleaned = *desc;
3673                 RemovePeriodFromEnd( desc_cleaned, true );
3674                 x_AddQual(eFQ_gene_desc, new CFlatStringQVal(desc_cleaned));
3675             }
3676         }
3677         else if (locus_tag != NULL) {
3678             if (is_gene  &&  desc != NULL) {
3679                 x_AddQual(eFQ_gene_desc, new CFlatStringQVal(*desc));
3680             }
3681         }
3682     }
3683 
3684     //  gene syn:
3685     if ( gene_ref  ||  okay_to_propage ) {
3686         if (locus != NULL) {
3687             if (syn != NULL) {
3688                 x_AddQual(eFQ_gene_syn, new CFlatGeneSynonymsQVal(*syn));
3689             }
3690         } else if (locus_tag != NULL) {
3691             if (syn != NULL) {
3692                 x_AddQual(eFQ_gene_syn, new CFlatGeneSynonymsQVal(*syn));
3693             }
3694         } else if (desc != NULL) {
3695             if (syn != NULL) {
3696                 x_AddQual(eFQ_gene_syn, new CFlatGeneSynonymsQVal(*syn));
3697             }
3698         } else if (syn != NULL) {
3699             CGene_ref::TSyn syns = *syn;
3700             syns.pop_front();
3701             // ... and the rest as synonyms
3702             if (syn != NULL) {
3703                 x_AddQual(eFQ_gene_syn, new CFlatGeneSynonymsQVal(syns));
3704             }
3705         }
3706     }
3707 
3708     // gene nomenclature
3709     if( gene_ref->IsSetFormal_name() && subtype == CSeqFeatData::eSubtype_gene ) {
3710         x_AddQual( eFQ_nomenclature, new CFlatNomenclatureQVal(gene_ref->GetFormal_name()) );
3711     }
3712 
3713     // gene allele:
3714     {{
3715         // these bool vars just break up the if-statement to make it easier to understand
3716         const bool is_type_where_allele_from_gene_forbidden = (subtype == CSeqFeatData::eSubtype_variation);
3717         const bool is_type_where_allele_from_gene_forbidden_except_with_embl_or_ddbj =
3718             ( subtype == CSeqFeatData::eSubtype_mobile_element ||
3719               subtype == CSeqFeatData::eSubtype_centromere ||
3720               subtype == CSeqFeatData::eSubtype_telomere );
3721         const bool is_embl_or_ddbj = ( GetContext()->IsEMBL() || GetContext()->IsDDBJ() );
3722         if ( ! is_type_where_allele_from_gene_forbidden &&
3723              ( is_embl_or_ddbj || ! is_type_where_allele_from_gene_forbidden_except_with_embl_or_ddbj ) )
3724         {
3725             if (gene_ref->IsSetAllele()  &&  !NStr::IsBlank(gene_ref->GetAllele())) {
3726                 x_AddQual(eFQ_gene_allele, new CFlatStringQVal(gene_ref->GetAllele(),
3727                     CFormatQual::eTrim_WhitespaceOnly));
3728             }
3729         }
3730     }}
3731 
3732     //  gene xref:
3733     if (gene_ref->IsSetDb()) {
3734         x_AddQual(eFQ_gene_xref, new CFlatXrefQVal(gene_ref->GetDb()));
3735     }
3736 
3737     //  gene db-xref:
3738     switch (m_Feat.GetData().Which()) {
3739     case CSeqFeatData::e_Rna:
3740     case CSeqFeatData::e_Cdregion:
3741         if (gene_feat  &&  gene_feat->IsSetDbxref()) {
3742             CSeq_feat::TDbxref xrefs = gene_feat->GetDbxref();
3743             if (m_Feat.IsSetDbxref()) {
3744                 ITERATE (CSeq_feat::TDbxref, it, m_Feat.GetDbxref()) {
3745                     for (CSeq_feat::TDbxref::iterator i = xrefs.begin();
3746                          i != xrefs.end();  ++i) {
3747                         if ((*i)->Equals(**it)) {
3748                             xrefs.erase(i);
3749                             break;
3750                         }
3751                     }
3752                 }
3753             }
3754             if (xrefs.size()) {
3755                 x_AddQual(eFQ_db_xref, new CFlatXrefQVal(xrefs));
3756             }
3757         }
3758         break;
3759 
3760     default:
3761         break;
3762     }
3763 
3764     //  gene map:
3765     if (!from_overlap  &&  gene_ref->IsSetMaploc() && subtype == CSeqFeatData::eSubtype_gene) {
3766         x_AddQual(eFQ_gene_map, new CFlatStringQVal(gene_ref->GetMaploc()));
3767     }
3768 
3769     // gene pseudogene qual:
3770 
3771     // inherit pseudogene, if possible
3772     if( gene_feat && ! x_HasQual(eFQ_pseudogene) ) {
3773         const string & strPseudoGene = gene_feat->GetNamedQual("pseudogene");
3774         x_AddQual(eFQ_pseudogene, new CFlatStringQVal(strPseudoGene) );
3775     }
3776 }
3777 
3778 //  ----------------------------------------------------------------------------
x_AddQualsProt(CBioseqContext & ctx,bool pseudo)3779 void CFeatureItem::x_AddQualsProt(
3780     CBioseqContext& ctx,
3781     bool pseudo)
3782 //  ----------------------------------------------------------------------------
3783 {
3784     _ASSERT( m_Feat.GetData().IsProt() );
3785 
3786     const CSeqFeatData& data = m_Feat.GetData();
3787     const CProt_ref& pref = data.GetProt();
3788     CProt_ref::TProcessed processed = pref.GetProcessed();
3789 
3790     //cerr << MSerial_AsnText << m_Feat.GetOriginalFeature();
3791 
3792     if ( ctx.IsNuc()  ||  (ctx.IsProt()  &&  !IsMappedFromProt()) ) {
3793         if ( pref.IsSetName()  &&  !pref.GetName().empty() ) {
3794             const CProt_ref::TName& names = pref.GetName();
3795             x_AddQual(eFQ_product, new CFlatStringQVal(names.front()));
3796             if (names.size() > 1) {
3797                 x_AddQual(eFQ_prot_names, new CFlatProductNamesQVal(names, m_Gene));
3798             }
3799         }
3800         if ( pref.IsSetDesc()  &&  !pref.GetDesc().empty() ) {
3801             if ( !ctx.IsProt() ) {
3802                 string desc = pref.GetDesc();
3803                 TrimSpacesAndJunkFromEnds(desc, true);
3804                 bool add_period = RemovePeriodFromEnd(desc, true);
3805                 CRef<CFlatStringQVal> prot_desc(new CFlatStringQVal(desc));
3806                 if (add_period) {
3807                     prot_desc->SetAddPeriod();
3808                 }
3809                 x_AddQual(eFQ_prot_desc, prot_desc);
3810 //                had_prot_desc = true;
3811             } else {
3812                 x_AddQual(eFQ_prot_name, new CFlatStringQVal(pref.GetDesc()));
3813             }
3814         }
3815         if ( pref.IsSetActivity()  &&  !pref.GetActivity().empty() ) {
3816             ITERATE (CProt_ref::TActivity, it, pref.GetActivity()) {
3817                 if (!NStr::IsBlank(*it)) {
3818                     x_AddQual(eFQ_prot_activity, new CFlatStringQVal(*it));
3819                 }
3820             }
3821         }
3822         if (pref.IsSetEc()  &&  !pref.GetEc().empty()) {
3823             ITERATE(CProt_ref::TEc, ec, pref.GetEc()) {
3824                 if ( !ctx.Config().DropIllegalQuals() ||  s_IsLegalECNumber(*ec)) {
3825                     x_AddQual(eFQ_prot_EC_number, new CFlatStringQVal(*ec));
3826                 }
3827             }
3828         }
3829         if ( m_Feat.IsSetProduct() ) {
3830             CBioseq_Handle prot =
3831                 ctx.GetScope().GetBioseqHandle( m_Feat.GetProductId() );
3832             if ( prot ) {
3833                 x_AddProductIdQuals(prot, eFQ_protein_id);
3834             } else {
3835                 try {
3836                     const CSeq_id& prod_id =
3837                         GetId( m_Feat.GetProduct(), &ctx.GetScope());
3838                     if ( ctx.IsRefSeq()  ||  !ctx.Config().ForGBRelease() ) {
3839                         x_AddQual(eFQ_protein_id, new CFlatSeqIdQVal(prod_id));
3840                     }
3841                 } catch (CObjmgrUtilException&) {}
3842             }
3843         }
3844     } else { // protein feature on subpeptide bioseq
3845         x_AddQual(eFQ_derived_from, new CFlatSeqLocQVal(m_Feat.GetLocation()));
3846     }
3847     if ( !pseudo  &&  ( ctx.Config().ShowPeptides() || ctx.Config().IsFormatGBSeq() || ctx.Config().IsFormatINSDSeq() ) ) {
3848         if ( processed == CProt_ref::eProcessed_mature          ||
3849              processed == CProt_ref::eProcessed_signal_peptide  ||
3850              processed == CProt_ref::eProcessed_transit_peptide  ||
3851              processed == CProt_ref::eProcessed_propeptide ) {
3852             CSeqVector pep(m_Feat.GetLocation(), ctx.GetScope());
3853             pep.SetCoding(CSeq_data::e_Ncbieaa);
3854             string peptide;
3855             pep.GetSeqData(pep.begin(), pep.end(), peptide);
3856             if (!NStr::IsBlank(peptide)) {
3857                 x_AddQual(eFQ_peptide, new CFlatStringQVal(peptide));
3858             }
3859         }
3860     }
3861 
3862     ///
3863     /// report molecular weights
3864     ///
3865     if (ctx.IsProt() && ( ctx.IsRefSeq() || ctx.Config().IsFormatGBSeq() || ctx.Config().IsFormatINSDSeq() ) && ! IsMappedFromProt() &&
3866         ! ( m_Feat.IsSetPartial() && m_Feat.GetPartial() ) &&
3867         ! ( m_Feat.GetLocation().IsPartialStart(eExtreme_Biological) ||
3868             m_Feat.GetLocation().IsPartialStop(eExtreme_Biological)) &&
3869         ! pseudo )
3870     {
3871         double wt = 0;
3872         bool has_mat_peptide = false;
3873         bool has_propeptide = false;
3874         bool has_signal_peptide = false;
3875 
3876         CConstRef<CSeq_loc> loc(&m_Feat.GetLocation());
3877 
3878         const bool is_pept_whole_loc = loc->IsWhole() ||
3879             ( loc->GetStart(eExtreme_Biological) == 0 &&
3880               loc->GetStop(eExtreme_Biological) == (ctx.GetHandle().GetBioseqLength() - 1) );
3881 
3882         if (processed == CProt_ref::eProcessed_not_set ||
3883                 processed == CProt_ref::eProcessed_preprotein )
3884         {
3885             SAnnotSelector sel = ctx.SetAnnotSelector();
3886             sel.SetFeatType(CSeqFeatData::e_Prot);
3887             for (CFeat_CI feat_it(ctx.GetHandle(), sel);  feat_it;  ++feat_it) {
3888                 bool copy_loc = false;
3889                 switch (feat_it->GetData().GetProt().GetProcessed()) {
3890                 case CProt_ref::eProcessed_signal_peptide:
3891                 case CProt_ref::eProcessed_transit_peptide:
3892                     {{
3893                          has_signal_peptide = true;
3894                          if ( (feat_it->GetLocation().GetTotalRange().GetFrom() ==
3895                                m_Feat.GetLocation().GetTotalRange().GetFrom()) &&
3896                                ! feat_it->GetLocation().Equals( m_Feat.GetLocation() ) ) {
3897                              loc = loc->Subtract(feat_it->GetLocation(),
3898                                                  CSeq_loc::fSortAndMerge_All,
3899                                                  NULL, NULL);
3900                          }
3901                      }}
3902                     break;
3903 
3904                 case CProt_ref::eProcessed_mature:
3905                     has_mat_peptide = true;
3906                     break;
3907 
3908                 case CProt_ref::eProcessed_propeptide:
3909                     has_propeptide = true;
3910                     break;
3911 
3912                 default:
3913                     break;
3914                 }
3915 
3916                 if (copy_loc) {
3917                     /// we need to adjust our location to the end of the signal
3918                     /// peptide
3919                     CRef<CSeq_loc> l(new CSeq_loc);
3920                     loc = l;
3921                     l->Assign(m_Feat.GetLocation());
3922                     l->SetInt().SetTo
3923                         (feat_it->GetLocation().GetTotalRange().GetTo());
3924                 }
3925             }
3926         }
3927 
3928         /**
3929         CMolInfo::TCompleteness comp = CMolInfo::eCompleteness_partial;
3930         {{
3931              CConstRef<CMolInfo> molinfo
3932                  (sequence::GetMolInfo(ctx.GetHandle()));
3933              if (molinfo) {
3934                  comp = molinfo->GetCompleteness();
3935              }
3936          }}
3937          **/
3938 
3939         if ( !(loc->IsPartialStart(eExtreme_Biological) || loc->IsPartialStop(eExtreme_Biological)) ) {
3940 
3941             bool proteinIsAtLeastMature;
3942             switch( pref.GetProcessed() ) {
3943                 case CProt_ref::eProcessed_not_set:
3944                 case CProt_ref::eProcessed_preprotein:
3945                     proteinIsAtLeastMature = false;
3946                     break;
3947                 default:
3948                     proteinIsAtLeastMature = true;
3949                     break;
3950             }
3951 
3952             if ( (!has_mat_peptide  ||  !has_signal_peptide  ||  !has_propeptide) || (proteinIsAtLeastMature) || (!is_pept_whole_loc) ) {
3953                 try {
3954                     const TGetProteinWeight flags = 0;
3955                     wt = GetProteinWeight(m_Feat.GetOriginalFeature(),
3956                                           ctx.GetScope(), loc, flags);
3957                 }
3958                 catch (CException&) {
3959                 }
3960             }
3961         }
3962 
3963         /// note: we report the weight rounded to the nearest int
3964         if (wt) {
3965             x_AddQual(eFQ_calculated_mol_wt,
3966                       new CFlatIntQVal((int(wt + 0.5))));
3967         }
3968     }
3969 
3970     // cleanup
3971     if ( processed == CProt_ref::eProcessed_signal_peptide  ||
3972          processed == CProt_ref::eProcessed_transit_peptide ) {
3973         if ( !ctx.IsRefSeq() ) {
3974            // Only RefSeq allows product on signal or transit peptide
3975            x_RemoveQuals(eFQ_product);
3976         }
3977     }
3978     if ( processed == CProt_ref::eProcessed_preprotein  &&
3979          !ctx.IsRefSeq()  &&  !ctx.IsProt()  &&
3980          data.GetSubtype() == CSeqFeatData::eSubtype_preprotein ) {
3981         const CFlatStringQVal* product = x_GetStringQual(eFQ_product);
3982         if (product != NULL) {
3983             x_AddQual(eFQ_encodes, new CFlatStringQVal("encodes " + product->GetValue()));
3984             x_RemoveQuals(eFQ_product);
3985         }
3986     }
3987 }
3988 
3989 
s_ParseParentQual(const CGb_qual & gbqual,list<string> & vals)3990 static void s_ParseParentQual(const CGb_qual& gbqual, list<string>& vals)
3991 {
3992     vals.clear();
3993 
3994     if (!gbqual.IsSetVal()  || NStr::IsBlank(gbqual.GetVal())) {
3995         return;
3996     }
3997 
3998     const string& val = gbqual.GetVal();
3999 
4000     if (val.length() > 1  &&  NStr::StartsWith(val, '(')  &&
4001         NStr::EndsWith(val, ')')  && val.find(',') != NPOS) {
4002         NStr::Split(val, "(,)", vals, NStr::fSplit_Tokenize);
4003     } else {
4004         vals.push_back(val);
4005     }
4006 
4007     list<string>::iterator it = vals.begin();
4008     while (it != vals.end()) {
4009         if (NStr::IsBlank(*it)) {
4010             it = vals.erase(it);
4011         } else {
4012             ConvertQuotes(*it);
4013             ExpandTildes(*it, eTilde_space);
4014             ++it;
4015         }
4016     }
4017 }
4018 
4019 
4020 struct SLegalImport {
4021     const char*       m_Name;
4022     EFeatureQualifier m_Value;
4023 
operator stringSLegalImport4024     operator string(void) const { return m_Name; }
4025 };
4026 
4027 
s_IsValidDirection(const string & direction)4028 static bool s_IsValidDirection(const string& direction) {
4029     return NStr::EqualNocase(direction, "LEFT")   ||
4030            NStr::EqualNocase(direction, "RIGHT")  ||
4031            NStr::EqualNocase(direction, "BOTH");
4032 }
4033 
4034 
s_IsValidnConsSplice(const string & cons_splice)4035 static bool s_IsValidnConsSplice(const string& cons_splice) {
4036     return NStr::EqualNocase(cons_splice, "(5'site:YES, 3'site:YES)")     ||
4037            NStr::EqualNocase(cons_splice, "(5'site:YES, 3'site:NO)")      ||
4038            NStr::EqualNocase(cons_splice, "(5'site:YES, 3'site:ABSENT)")  ||
4039            NStr::EqualNocase(cons_splice, "(5'site:NO, 3'site:YES)")      ||
4040            NStr::EqualNocase(cons_splice, "(5'site:NO, 3'site:NO)")       ||
4041            NStr::EqualNocase(cons_splice, "(5'site:NO, 3'site:ABSENT)")   ||
4042            NStr::EqualNocase(cons_splice, "(5'site:ABSENT, 3'site:YES)")  ||
4043            NStr::EqualNocase(cons_splice, "(5'site:ABSENT, 3'site:NO)")   ||
4044            NStr::EqualNocase(cons_splice, "(5'site:ABSENT, 3'site:ABSENT)");
4045 }
4046 
4047 // currently just converts PMIDs into links
4048 static void
s_HTMLizeExperimentQual(string & out_new_val,const string & val)4049 s_HTMLizeExperimentQual( string &out_new_val, const string &val)
4050 {
4051     static const string kPmid("PMID:");
4052 
4053     // just to make sure
4054     out_new_val.clear();
4055 
4056     // str_pos should generally be considered as holding the first position
4057     // in val that we have not yet processed and copied to out_new_val.
4058     SIZE_TYPE str_pos = 0;
4059     while( str_pos < val.length() ) {
4060 
4061         // find next "PMID:" to process
4062         const SIZE_TYPE pmid_label_pos = val.find( "PMID:", str_pos );
4063         if( pmid_label_pos == NPOS ) {
4064             // no more PMIDs left.
4065             // copy the rest of the string and let's leave
4066             copy( val.begin() + str_pos, val.end(), back_inserter(out_new_val) );
4067             return;
4068         }
4069 
4070         // copy val up to just after "PMID:"
4071         const SIZE_TYPE first_pmid_pos = pmid_label_pos + kPmid.length();
4072         copy( val.begin() + str_pos, val.begin() + first_pmid_pos, back_inserter(out_new_val) );
4073         str_pos = first_pmid_pos;
4074 
4075         // push pmids (with links) onto the output
4076         // we consider the pmids to be numbers separated by one or more spaces and/or commas.
4077         bool first_num = true;
4078         while( str_pos < val.length() ) {
4079             // skip spaces and commas before pmid
4080             const SIZE_TYPE next_pmid_pos = val.find_first_not_of(" ,", str_pos);
4081             if( next_pmid_pos == NPOS || ! isdigit(val[next_pmid_pos]) ) {
4082                 break;
4083             }
4084 
4085             // find end of pmid
4086             SIZE_TYPE end_of_pmid_pos = val.find_first_not_of("0123456789", next_pmid_pos );
4087             if( NPOS == end_of_pmid_pos ) {
4088                 end_of_pmid_pos = val.length();
4089             }
4090 
4091             // extract the actual pmid
4092             string pmid = val.substr(next_pmid_pos, end_of_pmid_pos - next_pmid_pos );
4093 
4094             // write pmid with link
4095             if( ! first_num ) {
4096                 out_new_val += ',';
4097             }
4098             out_new_val += "<a href=\"";
4099             out_new_val += strLinkBasePubmed;
4100             out_new_val += pmid;
4101             out_new_val += "\">";
4102             out_new_val += pmid;
4103             out_new_val += "</a>";
4104             str_pos = end_of_pmid_pos;
4105 
4106             first_num = false;
4107         }
4108     }
4109 }
4110 
4111 //  ----------------------------------------------------------------------------
x_ImportQuals(CBioseqContext & ctx)4112 void CFeatureItem::x_ImportQuals(
4113     CBioseqContext& ctx )
4114 //  ----------------------------------------------------------------------------
4115 {
4116     _ASSERT(m_Feat.IsSetQual());
4117 
4118     typedef SStaticPair<const char*, EFeatureQualifier> TLegalImport;
4119     static const TLegalImport kLegalImports[] = {
4120         // Must be in case-insensitive alphabetical order!
4121 #define DO_IMPORT(x) { #x, eFQ_##x }
4122         DO_IMPORT(allele),
4123         DO_IMPORT(bound_moiety),
4124         DO_IMPORT(circular_RNA),
4125         DO_IMPORT(clone),
4126         DO_IMPORT(codon),
4127         DO_IMPORT(compare),
4128         DO_IMPORT(cons_splice),
4129         DO_IMPORT(cyt_map),
4130         DO_IMPORT(direction),
4131         DO_IMPORT(EC_number),
4132         DO_IMPORT(estimated_length),
4133         DO_IMPORT(evidence),
4134         DO_IMPORT(experiment),
4135         DO_IMPORT(frequency),
4136         DO_IMPORT(function),
4137         DO_IMPORT(gap_type),
4138         DO_IMPORT(gen_map),
4139         DO_IMPORT(inference),
4140         DO_IMPORT(insertion_seq),
4141         DO_IMPORT(label),
4142         DO_IMPORT(linkage_evidence),
4143         DO_IMPORT(map),
4144         DO_IMPORT(mobile_element),
4145         DO_IMPORT(mobile_element_type),
4146         DO_IMPORT(mod_base),
4147         DO_IMPORT(ncRNA_class),
4148         DO_IMPORT(number),
4149         DO_IMPORT(old_locus_tag),
4150         DO_IMPORT(operon),
4151         DO_IMPORT(organism),
4152         DO_IMPORT(PCR_conditions),
4153         DO_IMPORT(phenotype),
4154         DO_IMPORT(product),
4155         DO_IMPORT(pseudogene),
4156         DO_IMPORT(rad_map),
4157         DO_IMPORT(recombination_class),
4158         DO_IMPORT(regulatory_class),
4159         DO_IMPORT(replace),
4160         DO_IMPORT(ribosomal_slippage),
4161         DO_IMPORT(rpt_family),
4162         DO_IMPORT(rpt_type),
4163         DO_IMPORT(rpt_unit),
4164         DO_IMPORT(rpt_unit_range),
4165         DO_IMPORT(rpt_unit_seq),
4166         DO_IMPORT(satellite),
4167         DO_IMPORT(standard_name),
4168         DO_IMPORT(tag_peptide),
4169         DO_IMPORT(trans_splicing),
4170         DO_IMPORT(transposon),
4171         DO_IMPORT(UniProtKB_evidence),
4172         DO_IMPORT(usedin)
4173 #undef DO_IMPORT
4174     };
4175     typedef const CStaticPairArrayMap<const char*, EFeatureQualifier, PNocase_CStr> TLegalImportMap;
4176     DEFINE_STATIC_ARRAY_MAP(TLegalImportMap, kLegalImportMap, kLegalImports);
4177 
4178     bool check_qual_syntax = ctx.Config().CheckQualSyntax();
4179 
4180     const bool old_locus_tag_added_elsewhere = x_HasQual(eFQ_old_locus_tag);
4181 
4182     bool first_pseudogene = true;
4183 
4184     vector<string> replace_quals;
4185     const CSeq_feat_Base::TQual & qual = m_Feat.GetQual(); // must store reference since ITERATE macro evaluates 3rd arg multiple times
4186     ITERATE( CSeq_feat::TQual, it, qual ) {
4187         if (!(*it)->IsSetQual()  ||  !(*it)->IsSetVal()) {
4188             continue;
4189         }
4190         const string& val = (*it)->GetVal();
4191 
4192         const char* name = (*it)->GetQual().c_str();
4193         const TLegalImportMap::const_iterator li = kLegalImportMap.find(name);
4194         EFeatureQualifier   slot = eFQ_illegal_qual;
4195         if ( li != kLegalImportMap.end() ) {
4196             slot = li->second;
4197         } else if (check_qual_syntax) {
4198             continue;
4199         }
4200 
4201         // only certain slot types may have an empty value (e.g. M96433)
4202         switch(slot) {
4203         case eFQ_replace:
4204         case eFQ_pseudogene:
4205             // empty value allowed for these slot types, so we don't check
4206             break;
4207         default:
4208             // empty value forbidden for other slot types
4209             if( val.empty() ) {
4210                 continue;
4211             }
4212             break;
4213         }
4214 
4215         switch (slot) {
4216         case eFQ_allele:
4217             // if /allele inherited from gene, suppress allele gbqual on feature
4218             if (x_HasQual(eFQ_gene_allele)) {
4219                 continue;
4220             } else {
4221                 x_AddQual(slot, new CFlatStringQVal(val,
4222                     CFormatQual::eTrim_WhitespaceOnly));
4223             }
4224             break;
4225         case eFQ_codon:
4226             if ((*it)->IsSetVal()  &&  !NStr::IsBlank(val)) {
4227                 x_AddQual(slot, new CFlatStringQVal(val, CFormatQual::eUnquoted));
4228             }
4229             break;
4230         case eFQ_cons_splice:
4231             if ((*it)->IsSetVal()) {
4232                 if (!check_qual_syntax  ||  s_IsValidnConsSplice(val)) {
4233                     x_AddQual(slot, new CFlatStringQVal(val));
4234                 }
4235             }
4236             break;
4237         case eFQ_direction:
4238             if ((*it)->IsSetVal()) {
4239                 if (!check_qual_syntax  ||  s_IsValidDirection(val)) {
4240                     x_AddQual(slot, new CFlatNumberQVal(val));
4241                 }
4242             }
4243             break;
4244         case eFQ_estimated_length:
4245         case eFQ_mod_base:
4246         case eFQ_number:
4247             if ((*it)->IsSetVal()  &&  !NStr::IsBlank(val)) {
4248                 x_AddQual(slot, new CFlatNumberQVal(val));
4249             }
4250             break;
4251         case eFQ_rpt_type:
4252             x_AddRptTypeQual(val, check_qual_syntax);
4253             break;
4254         case eFQ_rpt_unit:
4255             if ((*it)->IsSetVal()) {
4256                 x_AddRptUnitQual(val);
4257             }
4258             break;
4259         case eFQ_usedin:
4260         {{
4261             list<string> vals;
4262             s_ParseParentQual(**it, vals);
4263             ITERATE (list<string>, i, vals) {
4264                 x_AddQual(slot, new CFlatStringQVal(*i, CFormatQual::eQuoted));
4265             }
4266             break;
4267         }}
4268         case eFQ_old_locus_tag:
4269         {{
4270             if( ! old_locus_tag_added_elsewhere ) {
4271                 list<string> vals;
4272                 s_ParseParentQual(**it, vals);
4273                 ITERATE (list<string>, i, vals) {
4274                     x_AddQual(slot, new CFlatStringQVal(*i, CFormatQual::eQuoted, CFormatQual::eTrim_WhitespaceOnly));
4275                 }
4276             }
4277             break;
4278         }}
4279         case eFQ_rpt_family:
4280             if ((*it)->IsSetVal()  &&  !NStr::IsBlank(val)) {
4281                 x_AddQual(slot, new CFlatStringQVal(val));
4282             }
4283             break;
4284         case eFQ_label:
4285             x_AddQual(slot, new CFlatLabelQVal(val));
4286             break;
4287         case eFQ_EC_number:
4288             if ((*it)->IsSetVal()  &&
4289                 ( ! ctx.Config().DropIllegalQuals() || s_IsLegalECNumber(val) ) ) {
4290                 x_AddQual(slot, new CFlatStringQVal(val));
4291             }
4292             break;
4293         case eFQ_illegal_qual:
4294             if ( ctx.UsingSeqEntryIndex() && NStr::CompareNocase (name, "transl_except") == 0 ) {
4295               break;
4296             }
4297             x_AddQual(slot, new CFlatIllegalQVal(**it));
4298             break;
4299         case eFQ_product:
4300             if (!x_HasQual(eFQ_product)) {
4301                 x_AddQual(slot, new CFlatStringQVal(val));
4302             } else {
4303                 const CFlatStringQVal* gene = x_GetStringQual(eFQ_gene);
4304                 const string& gene_val =
4305                     gene != NULL ? gene->GetValue() : kEmptyStr;
4306                 const CFlatStringQVal* product = x_GetStringQual(eFQ_product);
4307                 const string& product_val =
4308                     product != NULL ? product->GetValue() : kEmptyStr;
4309                 if (val != gene_val  &&  val != product_val) {
4310 
4311                     if ( ! ctx.Config().CodonRecognizedToNote() ||
4312                          ! x_HasQual(eFQ_trna_codons) ||
4313                          NStr::Find(val, "RNA") == NPOS )
4314                     {
4315                         x_AddQual(eFQ_xtra_prod_quals, new CFlatStringQVal(val));
4316                     }
4317                 }
4318             }
4319             break;
4320         case eFQ_compare:
4321             {{
4322                 list<string> vals;
4323                 s_ParseParentQual(**it, vals);
4324                 ITERATE (list<string>, i, vals) {
4325                     if (!ctx.Config().CheckQualSyntax()  ||
4326                         IsValidAccession(*i, eValidateAccDotVer)) {
4327                         x_AddQual(slot, new CFlatStringQVal(*i, CFormatQual::eUnquoted));
4328                     }
4329                 }
4330             }}
4331             break;
4332         case eFQ_evidence:
4333             {{
4334                 if ( val == "EXPERIMENTAL" ) {
4335                     x_AddQual(eFQ_experiment, new CFlatExperimentQVal());
4336                 } else if ( val == "NOT_EXPERIMENTAL" ) {
4337                     x_AddQual(eFQ_inference, new CFlatInferenceQVal());
4338                 }
4339             }}
4340             break;
4341 
4342         case eFQ_rpt_unit_range:
4343             x_AddQual(slot, new CFlatStringQVal(val, CFormatQual::eUnquoted));
4344             break;
4345 
4346         case eFQ_replace:
4347             {{
4348                  string s(val);
4349                  if (string::npos == s.find_first_not_of("ACGTUacgtu")) {
4350                       NStr::ToLower(s);
4351                       NStr::ReplaceInPlace(s, "u", "t");
4352                  }
4353                  replace_quals.push_back(s);
4354              }}
4355             break;
4356 
4357         case eFQ_operon:
4358             {{
4359                 if( ! x_HasQual(eFQ_operon) ) {
4360                     x_AddQual(slot, new CFlatStringQVal(val));
4361                 }
4362             }}
4363             break;
4364 
4365         case eFQ_experiment:
4366             {{
4367                 if( ctx.Config().DoHTML() && ! CommentHasSuspiciousHtml(val) ) {
4368                     string new_val;
4369                     s_HTMLizeExperimentQual(new_val, val);
4370                     x_AddQual(slot, new CFlatStringQVal(new_val));
4371                 } else {
4372                     x_AddQual(slot, new CFlatStringQVal(val));
4373                 }
4374             }}
4375             break;
4376 
4377         case eFQ_clone:
4378             x_AddQual(slot, new CFlatStringQVal(val, CFormatQual::eTrim_WhitespaceOnly));
4379             break;
4380 
4381         case eFQ_pseudogene:
4382 
4383             // our pseudogene(s) override(s) any that existed before
4384             if( first_pseudogene ) {
4385                 first_pseudogene = false;
4386                 x_RemoveQuals(eFQ_pseudogene);
4387             }
4388             x_AddQual(slot, new CFlatStringQVal(val));
4389 
4390             break;
4391 
4392         case eFQ_regulatory_class:
4393             x_AddRegulatoryClassQual(val, check_qual_syntax);
4394             break;
4395 
4396         case eFQ_recombination_class:
4397             x_AddRecombinationClassQual(val, check_qual_syntax);
4398             break;
4399 
4400         default:
4401             x_AddQual(slot, new CFlatStringQVal(val));
4402             break;
4403         }
4404     }
4405 
4406     if (replace_quals.size()) {
4407         std::sort(replace_quals.begin(), replace_quals.end());
4408         ITERATE (vector<string>, it, replace_quals) {
4409             x_AddQual(eFQ_replace, new CFlatStringQVal(*it));
4410         }
4411     }
4412 
4413     // some "map-related" qual adjustments
4414     if( ctx.Config().HideSpecificGeneMaps() && ! x_HasQual(eFQ_map) ) {
4415         if( x_HasQual(eFQ_cyt_map) ) {
4416             x_AddQual(eFQ_map, x_GetQual(eFQ_cyt_map)->second );
4417         } else if( x_HasQual(eFQ_gen_map) ) {
4418             x_AddQual(eFQ_map, x_GetQual(eFQ_gen_map)->second );
4419         } else if( x_HasQual(eFQ_rad_map) ) {
4420             x_AddQual(eFQ_map, x_GetQual(eFQ_rad_map)->second );
4421         }
4422         x_RemoveQuals(eFQ_cyt_map);
4423         x_RemoveQuals(eFQ_gen_map);
4424         x_RemoveQuals(eFQ_rad_map);
4425     }
4426 }
4427 
4428 //  ----------------------------------------------------------------------------
x_AddRptUnitQual(const string & rpt_unit)4429 void CFeatureItem::x_AddRptUnitQual(
4430     const string& rpt_unit )
4431 //  ----------------------------------------------------------------------------
4432 {
4433     if (rpt_unit.empty()) {
4434         return;
4435     }
4436 
4437     vector<string> units;
4438 
4439     if (NStr::StartsWith(rpt_unit, '(')  &&  NStr::EndsWith(rpt_unit, ')')  &&
4440         NStr::Find(rpt_unit, "(", 1) == NPOS) {
4441         string tmp = rpt_unit.substr(1, rpt_unit.length() - 2);
4442         NStr::Split(tmp, ",", units, 0);
4443     } else {
4444         units.push_back(rpt_unit);
4445     }
4446 
4447     NON_CONST_ITERATE (vector<string>, it, units) {
4448         if (!it->empty()) {
4449             NStr::TruncateSpacesInPlace(*it);
4450             x_AddQual(eFQ_rpt_unit, new CFlatStringQVal(*it));
4451         }
4452     }
4453 }
4454 
4455 
4456 //  ----------------------------------------------------------------------------
x_AddRptTypeQual(const string & rpt_type,bool check_qual_syntax)4457 void CFeatureItem::x_AddRptTypeQual(
4458     const string& rpt_type,
4459     bool check_qual_syntax )
4460 //  ----------------------------------------------------------------------------
4461 {
4462     if (rpt_type.empty()) {
4463         return;
4464     }
4465 
4466     string value( rpt_type );
4467     NStr::TruncateSpacesInPlace( value );
4468 
4469     vector<string> pieces;
4470     s_SplitCommaSeparatedStringInParens( pieces, value );
4471 
4472     ITERATE( vector<string>, it, pieces ) {
4473         if ( ! check_qual_syntax || CGb_qual::IsValidRptTypeValue( *it ) ) {
4474             x_AddQual( eFQ_rpt_type, new CFlatStringQVal( *it, CFormatQual::eUnquoted ) );
4475         }
4476     }
4477 }
4478 
4479 
s_IsValidRegulatoryClass(const string & type)4480 static bool s_IsValidRegulatoryClass(const string& type)
4481 {
4482     vector<string> valid_types = CSeqFeatData::GetRegulatoryClassList();
4483 
4484     FOR_EACH_STRING_IN_VECTOR (itr, valid_types) {
4485         string str = *itr;
4486         if (NStr::Equal (str, type)) return true;
4487     }
4488 
4489     return false;
4490 }
4491 
s_IsValidRecombinationClass(const string & type)4492 static bool s_IsValidRecombinationClass(const string& type)
4493 {
4494     vector<string> valid_types = CSeqFeatData::GetRecombinationClassList();
4495 
4496     FOR_EACH_STRING_IN_VECTOR (itr, valid_types) {
4497         string str = *itr;
4498         if (NStr::Equal (str, type)) return true;
4499     }
4500 
4501     return false;
4502 }
4503 
4504 //  ----------------------------------------------------------------------------
x_AddRecombinationClassQual(const string & recombination_class,bool check_qual_syntax)4505 void CFeatureItem::x_AddRecombinationClassQual(
4506     const string& recombination_class,
4507     bool check_qual_syntax
4508 )
4509 //  ----------------------------------------------------------------------------
4510 {
4511     if (recombination_class.empty()) {
4512         return;
4513     }
4514 
4515      string recomb_class = recombination_class;
4516 
4517     if (NStr::StartsWith(recomb_class, "other:")) {
4518         NStr::TrimPrefixInPlace(recomb_class, "other:");
4519         NStr::TruncateSpacesInPlace(recomb_class);
4520     }
4521     if ( s_IsValidRecombinationClass( recomb_class ) ) {
4522         x_AddQual( eFQ_recombination_class, new CFlatStringQVal(recomb_class));
4523     } else {
4524         x_AddQual( eFQ_recombination_class, new CFlatStringQVal("other"));
4525         x_AddQual( eFQ_seqfeat_note, new CFlatStringQVal(recomb_class));
4526     }
4527 }
4528 
4529 
4530 //  ----------------------------------------------------------------------------
x_AddRegulatoryClassQual(const string & regulatory_class,bool check_qual_syntax)4531 void CFeatureItem::x_AddRegulatoryClassQual(
4532     const string& regulatory_class,
4533     bool check_qual_syntax
4534 )
4535 //  ----------------------------------------------------------------------------
4536 {
4537     if (regulatory_class.empty()) {
4538         return;
4539     }
4540 
4541      string reg_class = regulatory_class;
4542 
4543     if (NStr::StartsWith(reg_class, "other:")) {
4544         NStr::TrimPrefixInPlace(reg_class, "other:");
4545         NStr::TruncateSpacesInPlace(reg_class);
4546     }
4547     if ( s_IsValidRegulatoryClass( reg_class ) ) {
4548         x_AddQual( eFQ_regulatory_class, new CFlatStringQVal(reg_class));
4549     } else if (NStr::CompareNocase(reg_class, "other") == 0  &&
4550         m_Feat.IsSetComment()  &&  !m_Feat.GetComment().empty()) {
4551         x_AddQual( eFQ_regulatory_class, new CFlatStringQVal("other"));
4552     } else {
4553         x_AddQual( eFQ_regulatory_class, new CFlatStringQVal("other"));
4554         x_AddQual( eFQ_seqfeat_note, new CFlatStringQVal(reg_class));
4555     }
4556 }
4557 
4558 
x_FormatQuals(CFlatFeature & ff) const4559 void CFeatureItem::x_FormatQuals(CFlatFeature& ff) const
4560 {
4561     const CFlatFileConfig& cfg = GetContext()->Config();
4562 
4563     if ( cfg.IsFormatFTable() ) {
4564         ff.SetQuals() = m_FTableQuals;
4565         return;
4566     }
4567 
4568     ff.SetQuals().reserve(m_Quals.Size());
4569     CFlatFeature::TQuals& qvec = ff.SetQuals();
4570 
4571 #define DO_QUAL(x) x_FormatQual(eFQ_##x, #x, qvec)
4572     DO_QUAL(ncRNA_class);
4573     DO_QUAL(regulatory_class);
4574     DO_QUAL(recombination_class);
4575 
4576     DO_QUAL(partial);
4577     DO_QUAL(gene);
4578 
4579     DO_QUAL(locus_tag);
4580     DO_QUAL(old_locus_tag);
4581 
4582     x_FormatQual(eFQ_gene_syn_refseq, "synonym", qvec);
4583     DO_QUAL(gene_syn);
4584 
4585     x_FormatQual(eFQ_gene_allele, "allele", qvec);
4586 
4587     DO_QUAL(operon);
4588 
4589     DO_QUAL(product);
4590 
4591     x_FormatQual(eFQ_prot_EC_number, "EC_number", qvec);
4592     x_FormatQual(eFQ_prot_activity,  "function", qvec);
4593 
4594     DO_QUAL(standard_name);
4595     DO_QUAL(coded_by);
4596     DO_QUAL(derived_from);
4597 
4598     x_FormatQual(eFQ_prot_name, "name", qvec);
4599     DO_QUAL(region_name);
4600     DO_QUAL(bond_type);
4601     DO_QUAL(site_type);
4602     DO_QUAL(sec_str_type);
4603     DO_QUAL(heterogen);
4604     DO_QUAL(non_std_residue);
4605 
4606     DO_QUAL(tag_peptide);
4607 
4608     DO_QUAL(evidence);
4609     DO_QUAL(experiment);
4610     DO_QUAL(inference);
4611     DO_QUAL(exception);
4612     DO_QUAL(ribosomal_slippage);
4613     DO_QUAL(trans_splicing);
4614     DO_QUAL(circular_RNA);
4615     DO_QUAL(artificial_location);
4616 
4617     if ( !cfg.GoQualsToNote() ) {
4618         if( cfg.GoQualsEachMerge() ) {
4619             // combine all quals of a given type onto the same qual
4620             x_FormatGOQualCombined(eFQ_go_component, "GO_component", qvec);
4621             x_FormatGOQualCombined(eFQ_go_function, "GO_function", qvec);
4622             x_FormatGOQualCombined(eFQ_go_process, "GO_process", qvec);
4623         } else {
4624             x_FormatQual(eFQ_go_component, "GO_component", qvec);
4625             x_FormatQual(eFQ_go_function, "GO_function", qvec);
4626             x_FormatQual(eFQ_go_process, "GO_process", qvec);
4627         }
4628     }
4629 
4630     DO_QUAL(nomenclature);
4631 
4632     x_FormatNoteQuals(ff);
4633     DO_QUAL(citation);
4634 
4635     DO_QUAL(number);
4636 
4637     DO_QUAL(pseudo);
4638     DO_QUAL(pseudogene);
4639     DO_QUAL(selenocysteine);
4640     DO_QUAL(pyrrolysine);
4641 
4642     DO_QUAL(codon_start);
4643 
4644     DO_QUAL(anticodon);
4645     if ( ! cfg.CodonRecognizedToNote() ) {
4646         DO_QUAL(trna_codons);
4647     }
4648     DO_QUAL(bound_moiety);
4649     DO_QUAL(clone);
4650     DO_QUAL(compare);
4651     // DO_QUAL(cons_splice);
4652     DO_QUAL(direction);
4653     DO_QUAL(function);
4654     DO_QUAL(frequency);
4655     DO_QUAL(EC_number);
4656     x_FormatQual(eFQ_gene_map, "map", qvec);
4657     // In certain modes, cyt_map, gen_map, and rad_map are
4658     // moved to eFQ_gene_map by x_ImportQuals:
4659     DO_QUAL(cyt_map);
4660     DO_QUAL(gen_map);
4661     DO_QUAL(rad_map);
4662     DO_QUAL(estimated_length);
4663     DO_QUAL(gap_type);
4664     DO_QUAL(linkage_evidence);
4665     DO_QUAL(allele);
4666     DO_QUAL(map);
4667     DO_QUAL(mod_base);
4668     DO_QUAL(PCR_conditions);
4669     DO_QUAL(phenotype);
4670     DO_QUAL(rpt_family);
4671     DO_QUAL(rpt_type);
4672     DO_QUAL(rpt_unit);
4673     DO_QUAL(rpt_unit_range);
4674     DO_QUAL(rpt_unit_seq);
4675     DO_QUAL(satellite);
4676     DO_QUAL(mobile_element);
4677     DO_QUAL(mobile_element_type);
4678     DO_QUAL(usedin);
4679 
4680     // extra imports, actually...
4681     x_FormatQual(eFQ_illegal_qual, "illegal", qvec);
4682 
4683     DO_QUAL(replace);
4684 
4685     DO_QUAL(transl_except);
4686     DO_QUAL(transl_table);
4687     DO_QUAL(codon);
4688     DO_QUAL(organism);
4689     DO_QUAL(label);
4690     x_FormatQual(eFQ_cds_product, "product", qvec);
4691     DO_QUAL(UniProtKB_evidence);
4692     DO_QUAL(protein_id);
4693     DO_QUAL(transcript_id);
4694     DO_QUAL(db_xref);
4695     x_FormatQual(eFQ_gene_xref, "db_xref", qvec);
4696     DO_QUAL(mol_wt);
4697     DO_QUAL(calculated_mol_wt);
4698     DO_QUAL(translation);
4699     DO_QUAL(transcription);
4700     DO_QUAL(peptide);
4701 
4702 #undef DO_QUAL
4703 }
4704 
4705 /*
4706 // check if str2 is a sub string of str1
4707 static bool s_IsRedundant(const string& str1, const string& str2)
4708 {
4709     size_t pos = NPOS;
4710     bool whole = false;
4711     for (pos = NStr::Find(str1, str2); pos != NPOS  &&  !whole; pos += str2.length()) {
4712         whole = IsWholeWord(str1, pos);
4713     }
4714     return (pos != NPOS  && whole);
4715 }
4716 
4717 
4718 // Remove redundant elements that occur twice or as part of other elements.
4719 static void s_PruneNoteQuals(CFlatFeature::TQuals& qvec)
4720 {
4721     if (qvec.empty()) {
4722         return;
4723     }
4724     CFlatFeature::TQuals::iterator it1 = qvec.begin();
4725     while (it1 != qvec.end()) {
4726         CFlatFeature::TQuals::iterator it2 = it1 + 1;
4727         const string& val1 = (*it1)->GetValue();
4728         while (it2 != qvec.end()) {
4729             const string& val2 = (*it2)->GetValue();
4730             if (s_IsRedundant(val1, val2)) {
4731                 it2 = qvec.erase(it2);
4732             } else if (s_IsRedundant(val2, val1)) {
4733                 break;
4734             } else {
4735                 ++it2;
4736             }
4737         }
4738         if (it2 != qvec.end()) {
4739             it1 = qvec.erase(it1);
4740         } else {
4741             ++it1;
4742         }
4743     }
4744 }
4745 */
4746 
x_FormatNoteQuals(CFlatFeature & ff) const4747 void CFeatureItem::x_FormatNoteQuals(CFlatFeature& ff) const
4748 {
4749     const CFlatFileConfig& cfg = GetContext()->Config();
4750     CFlatFeature::TQuals qvec;
4751 
4752 #define DO_NOTE(x) x_FormatNoteQual(eFQ_##x, GetStringOfFeatQual(eFQ_##x), qvec)
4753 #define DO_NOTE_PREPEND_NEWLINE(x) x_FormatNoteQual(eFQ_##x, GetStringOfFeatQual(eFQ_##x), qvec, IFlatQVal::fPrependNewline )
4754     DO_NOTE(transcript_id_note);
4755     DO_NOTE(gene_desc);
4756 
4757     if ( cfg.CodonRecognizedToNote() ) {
4758         DO_NOTE(trna_codons);
4759     }
4760     DO_NOTE(encodes);
4761     DO_NOTE(prot_desc);
4762     DO_NOTE(prot_note);
4763     DO_NOTE(prot_comment);
4764     DO_NOTE(prot_method);
4765     DO_NOTE(maploc);
4766     DO_NOTE(prot_conflict);
4767     DO_NOTE(prot_missing);
4768     DO_NOTE(seqfeat_note);
4769     DO_NOTE(region);
4770 //    DO_NOTE(selenocysteine_note);
4771     DO_NOTE(prot_names);
4772     DO_NOTE(bond);
4773     DO_NOTE(site);
4774 //    DO_NOTE(rrna_its);
4775     DO_NOTE(xtra_prod_quals);
4776 //     DO_NOTE(inference_bad);
4777     DO_NOTE(modelev);
4778 //     DO_NOTE(cdd_definition);
4779 //    DO_NOTE(tag_peptide);
4780     DO_NOTE_PREPEND_NEWLINE(exception_note);
4781 
4782     string notestr;
4783     string suffix;
4784 //    bool add_period = false;
4785     bool add_period = true/*fl*/;
4786 
4787     s_QualVectorToNote(qvec, true, notestr, suffix, add_period);
4788 
4789     if (GetContext()->Config().GoQualsToNote()) {
4790         qvec.clear();
4791         DO_NOTE(go_component);
4792         DO_NOTE(go_function);
4793         DO_NOTE(go_process);
4794         s_QualVectorToNote(qvec, false, notestr, suffix, add_period);
4795     }
4796     s_NoteFinalize(add_period, notestr, ff, eTilde_tilde);
4797 
4798 #undef DO_NOTE
4799 #undef DO_NOTE_PREPEND_NEWLINE
4800 }
4801 
x_FormatQual(EFeatureQualifier slot,const char * name,CFlatFeature::TQuals & qvec,IFlatQVal::TFlags flags) const4802 void CFeatureItem::x_FormatQual
4803 (EFeatureQualifier slot,
4804  const char* name,
4805  CFlatFeature::TQuals& qvec,
4806  IFlatQVal::TFlags flags) const
4807 {
4808     TQCI it = m_Quals.LowerBound(slot);
4809     TQCI end = m_Quals.end();
4810     while (it != end  &&  it->first == slot) {
4811         it->second->Format(qvec, name, *GetContext(), flags);
4812         ++it;
4813     }
4814 }
4815 
4816 
x_FormatNoteQual(EFeatureQualifier slot,const CTempString & name,CFlatFeature::TQuals & qvec,IFlatQVal::TFlags flags) const4817 void CFeatureItem::x_FormatNoteQual
4818 (EFeatureQualifier slot,
4819  const CTempString & name,
4820  CFlatFeature::TQuals& qvec,
4821  IFlatQVal::TFlags flags) const
4822 {
4823     flags |= IFlatQVal::fIsNote;
4824 
4825     TQCI it = m_Quals.LowerBound(slot);
4826     TQCI end = m_Quals.end();
4827     while (it != end  &&  it->first == slot) {
4828         it->second->Format(qvec, name, *GetContext(), flags);
4829         ++it;
4830     }
4831 }
4832 
4833 // This produces one qual out of all the GO quals of the given slot, with their
4834 // values concatenated.
x_FormatGOQualCombined(EFeatureQualifier slot,const CTempString & name,CFlatFeature::TQuals & qvec,TQualFlags flags) const4835 void CFeatureItem::x_FormatGOQualCombined
4836 (EFeatureQualifier slot,
4837  const CTempString & name,
4838  CFlatFeature::TQuals& qvec,
4839  TQualFlags flags) const
4840 {
4841     // copy all the given quals with that name since we need to sort them
4842     vector<CConstRef<CFlatGoQVal> > goQuals;
4843 
4844     TQCI it = m_Quals.LowerBound(slot);
4845     TQCI end = m_Quals.end();
4846     while (it != end  &&  it->first == slot) {
4847         goQuals.push_back( CConstRef<CFlatGoQVal>( dynamic_cast<const CFlatGoQVal*>( it->second.GetNonNullPointer() ) ) );
4848         ++it;
4849     }
4850 
4851     if( goQuals.empty() ) {
4852         return;
4853     }
4854 
4855     stable_sort( goQuals.begin(), goQuals.end(), CGoQualLessThan() );
4856 
4857     CFlatFeature::TQuals temp_qvec;
4858 
4859     string combined;
4860 
4861 
4862     string::size_type this_part_beginning_text_string_pos = 0;
4863 
4864     // now concatenate their values into the variable "combined"
4865     const string *pLastQualTextString = NULL;
4866     ITERATE( vector<CConstRef<CFlatGoQVal> >, iter, goQuals ) {
4867 
4868         // Use thisQualTextString to tell when we have consecutive quals with the
4869         // same text string.
4870         const string *pThisQualTextString = &(*iter)->GetTextString();
4871         if( NULL == pThisQualTextString ) {
4872             continue;
4873         }
4874 
4875         (*iter)->Format(temp_qvec, name, *GetContext(), flags);
4876 
4877         if( pLastQualTextString == NULL || ! NStr::EqualNocase( *pLastQualTextString, *pThisQualTextString ) ) {
4878             // normal case: each CFlatGoQVal has its own part
4879             if( ! combined.empty() ) {
4880                 combined += "; ";
4881                 this_part_beginning_text_string_pos = combined.length() - 1;
4882             }
4883             combined += temp_qvec.back()->GetValue();
4884         } else {
4885             // consecutive CFlatGoQVal with the same text string: merge
4886             // (chop off the part up to and including the text string )
4887             const string & new_value = temp_qvec.back()->GetValue();
4888 
4889             // let text_string_pos point to the part *after* the text string
4890             SIZE_TYPE post_text_string_pos = NStr::FindNoCase( new_value, *pLastQualTextString );
4891             _ASSERT( post_text_string_pos != NPOS );
4892             post_text_string_pos += pLastQualTextString->length();
4893 
4894             // append the new part after the text string, but only
4895             // if it's not a duplicate
4896             string str_to_append = new_value.substr( post_text_string_pos,
4897                 (pLastQualTextString->length() - post_text_string_pos) );
4898             if( NStr::Find(combined, str_to_append, this_part_beginning_text_string_pos) == NPOS ) {
4899                 combined.append( str_to_append );
4900             }
4901         }
4902 
4903         pLastQualTextString = pThisQualTextString;
4904     }
4905     pLastQualTextString = NULL; // just to make sure we don't accidentally use it
4906 
4907     // add the final merged CFormatQual
4908     if( ! combined.empty() ) {
4909         const string prefix = " ";
4910         const string suffix = ";";
4911         TFlatQual res(new CFormatQual(name, combined, prefix, suffix, CFormatQual::eQuoted ));
4912         qvec.push_back(res);
4913     }
4914 }
4915 
x_GetStringQual(EFeatureQualifier slot) const4916 const CFlatStringQVal* CFeatureItem::x_GetStringQual(EFeatureQualifier slot) const
4917 {
4918     const IFlatQVal* qual = 0;
4919     if ( x_HasQual(slot) ) {
4920         qual = m_Quals.Find(slot)->second;
4921     }
4922     return dynamic_cast<const CFlatStringQVal*>(qual);
4923 }
4924 
4925 
x_GetStringListQual(EFeatureQualifier slot) const4926 CFlatStringListQVal* CFeatureItem::x_GetStringListQual(EFeatureQualifier slot) const
4927 {
4928     IFlatQVal* qual = 0;
4929     if (x_HasQual(slot)) {
4930         qual = const_cast<IFlatQVal*>(&*m_Quals.Find(slot)->second);
4931     }
4932     return dynamic_cast<CFlatStringListQVal*>(qual);
4933 }
4934 
x_GetFlatProductNamesQual(EFeatureQualifier slot) const4935 CFlatProductNamesQVal * CFeatureItem::x_GetFlatProductNamesQual(EFeatureQualifier slot) const
4936 {
4937     IFlatQVal* qual = 0;
4938     if (x_HasQual(slot)) {
4939         qual = const_cast<IFlatQVal*>(&*m_Quals.Find(slot)->second);
4940     }
4941     return dynamic_cast<CFlatProductNamesQVal*>(qual);
4942 }
4943 
4944 // maps each valid mobile_element_type prefix to whether it
4945 // must have more info after the prefix
4946 typedef SStaticPair<const char *, bool> TMobileElemTypeKey;
4947 static const TMobileElemTypeKey mobile_element_key_to_suffix_required [] = {
4948     {  "LINE",                     false  },
4949     {  "MITE",                     false  },
4950     {  "SINE",                     false  },
4951     {  "insertion sequence",       false  },
4952     {  "integron",                 false  },
4953     {  "non-LTR retrotransposon",  false  },
4954     {  "other",                    true   },
4955     {  "retrotransposon",          false  },
4956     {  "transposon",               false  }
4957 };
4958 
4959 typedef CStaticPairArrayMap <const char*, bool, PCase_CStr> TMobileElemTypeMap;
4960 DEFINE_STATIC_ARRAY_MAP(TMobileElemTypeMap, sm_MobileElemTypeKeys, mobile_element_key_to_suffix_required);
4961 
4962 // returns whether or not it's valid
s_ValidateMobileElementType(const string & mobile_element_type_value)4963 bool s_ValidateMobileElementType( const string & mobile_element_type_value )
4964 {
4965     if( mobile_element_type_value.empty() ) {
4966         return false;
4967     }
4968 
4969     // if there's a colon, we ignore the part after the colon for testing purposes
4970     string::size_type colon_pos = mobile_element_type_value.find( ':' );
4971 
4972     const string value_before_colon = ( string::npos == colon_pos
4973         ? mobile_element_type_value
4974         : mobile_element_type_value.substr( 0, colon_pos ) );
4975 
4976     TMobileElemTypeMap::const_iterator prefix_info =
4977         sm_MobileElemTypeKeys.find( value_before_colon.c_str() );
4978     if( prefix_info == sm_MobileElemTypeKeys.end() ) {
4979         return false; // prefix not found
4980     }
4981 
4982     // check if info required after prefix (colon plus info, actually)
4983     if( prefix_info->second ) {
4984         if( string::npos == colon_pos ) {
4985             return false; // no additional info supplied, even though required
4986         }
4987     }
4988 
4989     // all tests passed
4990     return true;
4991 }
4992 
4993 class CInStringPred
4994 {
4995 public:
CInStringPred(const string & comparisonString)4996     explicit CInStringPred( const string &comparisonString )
4997         : m_ComparisonString( comparisonString )
4998     { }
4999 
operator ()(const string & arg)5000     bool operator()( const string &arg ) {
5001         return NStr::Find( m_ComparisonString, arg ) != NPOS;
5002     }
5003 private:
5004     const string &m_ComparisonString;
5005 };
5006 
x_CleanQuals(const CGene_ref * gene_ref)5007 void CFeatureItem::x_CleanQuals(
5008     const CGene_ref* gene_ref )
5009 {
5010     const TGeneSyn* gene_syn =
5011         (gene_ref && gene_ref->IsSetSyn() && !gene_ref->GetSyn().empty() )
5012         ?
5013         &gene_ref->GetSyn()
5014         :
5015         0;
5016     const CBioseqContext& ctx = *GetContext();
5017 
5018     if (ctx.Config().DropIllegalQuals()) {
5019         x_DropIllegalQuals();
5020     }
5021 
5022     CFlatProductNamesQVal * prot_names = x_GetFlatProductNamesQual(eFQ_prot_names);
5023     const CFlatStringQVal* gene = x_GetStringQual(eFQ_gene);
5024     const CFlatStringQVal* prot_desc = x_GetStringQual(eFQ_prot_desc);
5025     const CFlatStringQVal* standard_name = x_GetStringQual(eFQ_standard_name);
5026     const CFlatStringQVal* seqfeat_note = x_GetStringQual(eFQ_seqfeat_note);
5027 
5028     if (gene != NULL) {
5029         const string& gene_name = gene->GetValue();
5030 
5031         // /gene same as feature.comment will suppress /note
5032         if (m_Feat.IsSetComment()) {
5033             if (NStr::Equal(gene_name, m_Feat.GetComment())) {
5034                 x_RemoveQuals(eFQ_seqfeat_note);
5035                 seqfeat_note = NULL;
5036             }
5037         }
5038 
5039         // remove protein description that equals the gene name, case sensitive
5040         if (prot_desc != NULL) {
5041             if (s_StrEqualDisregardFinalPeriod(gene_name, prot_desc->GetValue(), NStr::eCase)) {
5042                 x_RemoveQuals(eFQ_prot_desc);
5043                 prot_desc = NULL;
5044             }
5045         }
5046 
5047         // remove prot name if equals gene
5048         if (prot_names != NULL) {
5049 
5050             CProt_ref::TName::iterator remove_start = prot_names->SetValue().begin();
5051             ++remove_start; // The "++" is because the first one shouldn't be erased since it's used for the product
5052             CProt_ref::TName::iterator new_end =
5053                 remove( remove_start, prot_names->SetValue().end(), gene_name );
5054             prot_names->SetValue().erase( new_end, prot_names->SetValue().end() );
5055 
5056             if (prot_names->GetValue().empty()) {
5057                 x_RemoveQuals(eFQ_prot_names);
5058                 prot_names = NULL;
5059             }
5060         }
5061     }
5062 
5063     if (prot_desc != NULL) {
5064         const string& pdesc = prot_desc->GetValue();
5065 
5066         // remove prot name if in prot_desc
5067         if (prot_names != NULL) {
5068             CProt_ref::TName::iterator remove_start = prot_names->SetValue().begin();
5069             ++remove_start; // The "++" is because the first one shouldn't be erased since it's used for the product
5070             CProt_ref::TName::iterator new_end =
5071                 remove_if( remove_start, prot_names->SetValue().end(),
5072                     CInStringPred(pdesc) );
5073             prot_names->SetValue().erase( new_end, prot_names->SetValue().end() );
5074 
5075             if (prot_names->GetValue().empty()) {
5076                 x_RemoveQuals(eFQ_prot_names);
5077                 prot_names = NULL;
5078             }
5079         }
5080         // remove protein description that equals the cds product, case sensitive
5081         const CFlatStringQVal* cds_prod = x_GetStringQual(eFQ_cds_product);
5082         if (cds_prod != NULL) {
5083             if (NStr::Equal(pdesc, cds_prod->GetValue())) {
5084                 x_RemoveQuals(eFQ_prot_desc);
5085                 prot_desc = NULL;
5086             }
5087         }
5088 
5089         // remove protein description that equals the standard name
5090         if (prot_desc != NULL  &&  standard_name != NULL) {
5091             // We use s_StrEqualDisregardFinalPeriod rather than plain NStr::EqualNoCase
5092             // because of, e.g., CU638784
5093             if (s_StrEqualDisregardFinalPeriod(pdesc, standard_name->GetValue(), NStr::eNocase )) {
5094                 x_RemoveQuals(eFQ_prot_desc);
5095                 prot_desc = NULL;
5096             }
5097         }
5098 
5099         // remove protein description that equals a gene synonym
5100         // NC_001823 leave in prot_desc if no cds_product
5101         if (prot_desc != NULL  &&  gene_syn != NULL  &&  cds_prod != NULL) {
5102             ITERATE (TGeneSyn, it, *gene_syn) {
5103                 if (!NStr::IsBlank(*it)  &&  pdesc == *it) {
5104                     x_RemoveQuals(eFQ_prot_desc);
5105                     prot_desc = NULL;
5106                     break;
5107                 }
5108             }
5109         }
5110     }
5111 
5112     // check if need to remove seqfeat_note
5113     // (This generally occurs when it's equal to (or, sometimes, contained in) another qual
5114     if (m_Feat.IsSetComment()) {
5115         const string &feat_comment = m_Feat.GetComment();
5116         const CFlatStringQVal* product     = x_GetStringQual(eFQ_product);
5117         const CFlatStringQVal* cds_product = x_GetStringQual(eFQ_cds_product);
5118 
5119         if (product != NULL) {
5120             if (NStr::EqualNocase(product->GetValue(), feat_comment)) {
5121                 x_RemoveQuals(eFQ_seqfeat_note);
5122                 seqfeat_note = NULL;
5123             }
5124         }
5125         if (cds_product != NULL && seqfeat_note != NULL) {
5126             if ( s_StrEqualDisregardFinalPeriod(cds_product->GetValue(), seqfeat_note->GetValue(), NStr::eCase ) ) {
5127                 x_RemoveQuals(eFQ_seqfeat_note);
5128                 seqfeat_note = NULL;
5129             }
5130         }
5131         // suppress selenocysteine note if already in comment
5132 //        if (NStr::Find(feat_comment, "selenocysteine") != NPOS) {
5133 //            x_RemoveQuals(eFQ_selenocysteine_note);
5134 //        }
5135 
5136         // /EC_number same as feat.comment will suppress /note
5137         if( seqfeat_note != NULL ) {
5138             for (TQCI it = x_GetQual(eFQ_EC_number); it != m_Quals.end()  &&  it->first == eFQ_EC_number; ++it) {
5139                 const CFlatStringQVal* ec = dynamic_cast<const CFlatStringQVal*>(it->second.GetPointerOrNull());
5140                 if (ec != NULL) {
5141                     if (NStr::EqualNocase(seqfeat_note->GetValue(), ec->GetValue())) {
5142                         x_RemoveQuals(eFQ_seqfeat_note);
5143                         seqfeat_note = NULL;
5144                         break;
5145                     }
5146                 }
5147             }
5148         }
5149 
5150         // this sort of note provides no additional info (we already know this is a tRNA by other places)
5151         if( feat_comment == "tRNA-" ) {
5152             x_RemoveQuals(eFQ_seqfeat_note);
5153             seqfeat_note = NULL;
5154         }
5155     }
5156 
5157     const CFlatStringQVal* note = x_GetStringQual(eFQ_seqfeat_note);
5158     if (note != NULL  &&  standard_name != NULL) {
5159         if (NStr::Equal(note->GetValue(), standard_name->GetValue())) {
5160             x_RemoveQuals(eFQ_seqfeat_note);
5161             note = NULL;
5162         }
5163     }
5164     if ( ! ctx.IsProt() && note != NULL  &&  gene_syn != NULL) {
5165         ITERATE (TGeneSyn, it, *gene_syn) {
5166             if (NStr::EqualNocase(note->GetValue(), *it)) {
5167                 x_RemoveQuals(eFQ_seqfeat_note);
5168                 note = NULL;
5169                 break;
5170             }
5171         }
5172     }
5173     if( note != NULL && prot_desc != NULL ) { // e.g. L07143, U28372
5174         if( NStr::Find(prot_desc->GetValue(), note->GetValue()) != NPOS ) {
5175             x_RemoveQuals(eFQ_seqfeat_note);
5176             note = NULL;
5177         }
5178     }
5179 
5180     // if there is a prot_desc, then we don't add a period to seqfeat_note
5181     // (Obviously, this part must come after the part that cleans up
5182     // the prot_descs, otherwise we may think we have a prot_desc, when the
5183     // prot_desc is actually to be removed )
5184     if( note != NULL && x_GetStringQual(eFQ_prot_desc ) ) {
5185         const_cast<CFlatStringQVal*>(note)->SetAddPeriod( false );
5186     }
5187 
5188     // hide invalid mobile_element_quals
5189     if( ctx.Config().IsModeRelease() || ctx.Config().IsModeEntrez() ) {
5190 
5191         const CFlatStringQVal *mobile_element_type = x_GetStringQual( eFQ_mobile_element_type );
5192         if( NULL != mobile_element_type && ! s_ValidateMobileElementType(mobile_element_type->GetValue()) ) {
5193             x_RemoveQuals( eFQ_mobile_element_type );
5194         }
5195 
5196     }
5197 
5198     // remove invalid pseudogenes:
5199     {
5200         TQI pseudogene_iter = m_Quals.Find(eFQ_pseudogene);
5201         while( pseudogene_iter != m_Quals.end() &&
5202             pseudogene_iter->first == eFQ_pseudogene )
5203         {
5204             const CFlatStringQVal & qual = dynamic_cast<const CFlatStringQVal &>( *pseudogene_iter->second );
5205             if( s_IsValidPseudoGene(GetContext()->Config().GetMode(), qual.GetValue() ) ) {
5206                 // keep valid pseudogene
5207                 ++pseudogene_iter;
5208             } else {
5209                 // erase invalid pseudogene
5210                 TQI pseudogene_iter_to_erase = pseudogene_iter;
5211                 ++pseudogene_iter;
5212 
5213                 m_Quals.Erase(pseudogene_iter_to_erase);
5214             }
5215         }
5216     }
5217 
5218     // /pseudogene qual suppresses /pseudo qual if /pseudogene fits certain patterns
5219     if( // ( GetContext()->Config().IsModeRelease() || GetContext()->Config().IsModeEntrez() ) &&
5220         x_HasQual(eFQ_pseudo) && x_HasQual(eFQ_pseudogene) )
5221     {
5222         const CFlatStringQVal* qval = x_GetStringQual(eFQ_pseudogene);
5223         // in this part, always use release-mode validation logic, regardless of actual mode
5224         if( qval && s_IsValidPseudoGene( CFlatFileConfig::eMode_Release, qval->GetValue() ) ) {
5225             x_RemoveQuals(eFQ_pseudo);
5226         }
5227     }
5228 }
5229 
5230 
5231 typedef SStaticPair<EFeatureQualifier, CSeqFeatData::EQualifier> TQualPair;
5232 static const TQualPair sc_GbToFeatQualMap[] = {
5233     { eFQ_none, CSeqFeatData::eQual_bad },
5234     { eFQ_allele, CSeqFeatData::eQual_allele },
5235     { eFQ_anticodon, CSeqFeatData::eQual_anticodon },
5236     { eFQ_artificial_location, CSeqFeatData::eQual_artificial_location },
5237     { eFQ_bond, CSeqFeatData::eQual_note },
5238     { eFQ_bond_type, CSeqFeatData::eQual_bond_type },
5239     { eFQ_bound_moiety, CSeqFeatData::eQual_bound_moiety },
5240     { eFQ_calculated_mol_wt, CSeqFeatData::eQual_calculated_mol_wt },
5241     { eFQ_cds_product, CSeqFeatData::eQual_product },
5242     { eFQ_circular_RNA, CSeqFeatData::eQual_circular_RNA },
5243     { eFQ_citation, CSeqFeatData::eQual_citation },
5244     { eFQ_clone, CSeqFeatData::eQual_clone },
5245     { eFQ_coded_by, CSeqFeatData::eQual_coded_by },
5246     { eFQ_codon, CSeqFeatData::eQual_codon },
5247     { eFQ_codon_start, CSeqFeatData::eQual_codon_start },
5248     { eFQ_compare, CSeqFeatData::eQual_compare },
5249     { eFQ_cons_splice, CSeqFeatData::eQual_cons_splice },
5250     { eFQ_cyt_map, CSeqFeatData::eQual_map },
5251     { eFQ_db_xref, CSeqFeatData::eQual_db_xref },
5252     { eFQ_derived_from, CSeqFeatData::eQual_derived_from },
5253     { eFQ_direction, CSeqFeatData::eQual_direction },
5254     { eFQ_EC_number, CSeqFeatData::eQual_EC_number },
5255     { eFQ_encodes, CSeqFeatData::eQual_note },
5256     { eFQ_estimated_length, CSeqFeatData::eQual_estimated_length },
5257     { eFQ_experiment, CSeqFeatData::eQual_experiment },
5258     { eFQ_exception, CSeqFeatData::eQual_exception },
5259     { eFQ_exception_note, CSeqFeatData::eQual_note },
5260     { eFQ_figure, CSeqFeatData::eQual_note },
5261     { eFQ_frequency, CSeqFeatData::eQual_frequency },
5262     { eFQ_function, CSeqFeatData::eQual_function },
5263     { eFQ_gap_type, CSeqFeatData::eQual_gap_type },
5264     { eFQ_gene, CSeqFeatData::eQual_gene },
5265     { eFQ_gene_desc, CSeqFeatData::eQual_note },
5266     { eFQ_gene_allele, CSeqFeatData::eQual_allele },
5267     { eFQ_gene_map, CSeqFeatData::eQual_map },
5268     { eFQ_gene_syn, CSeqFeatData::eQual_note },
5269     { eFQ_gene_syn_refseq, CSeqFeatData::eQual_note },
5270     { eFQ_gene_note, CSeqFeatData::eQual_note },
5271     { eFQ_gene_xref, CSeqFeatData::eQual_db_xref },
5272     { eFQ_go_component, CSeqFeatData::eQual_note },
5273     { eFQ_go_function, CSeqFeatData::eQual_note },
5274     { eFQ_go_process, CSeqFeatData::eQual_note },
5275     { eFQ_heterogen, CSeqFeatData::eQual_heterogen },
5276     { eFQ_illegal_qual, CSeqFeatData::eQual_bad },
5277     { eFQ_inference, CSeqFeatData::eQual_inference },
5278     { eFQ_label, CSeqFeatData::eQual_label },
5279     { eFQ_linkage_evidence, CSeqFeatData::eQual_linkage_evidence },
5280     { eFQ_locus_tag, CSeqFeatData::eQual_locus_tag },
5281     { eFQ_map, CSeqFeatData::eQual_map },
5282     { eFQ_maploc, CSeqFeatData::eQual_note },
5283     { eFQ_mobile_element, CSeqFeatData::eQual_mobile_element },
5284     { eFQ_mobile_element_type, CSeqFeatData::eQual_mobile_element_type },
5285     { eFQ_mod_base, CSeqFeatData::eQual_mod_base },
5286     { eFQ_modelev, CSeqFeatData::eQual_note },
5287     { eFQ_mol_wt, CSeqFeatData::eQual_calculated_mol_wt },
5288     { eFQ_ncRNA_class, CSeqFeatData::eQual_ncRNA_class },
5289     { eFQ_nomenclature, CSeqFeatData::eQual_nomenclature },
5290     { eFQ_non_std_residue, CSeqFeatData::eQual_non_std_residue },
5291     { eFQ_number, CSeqFeatData::eQual_number },
5292     { eFQ_old_locus_tag, CSeqFeatData::eQual_old_locus_tag },
5293     { eFQ_operon, CSeqFeatData::eQual_operon },
5294     { eFQ_organism, CSeqFeatData::eQual_organism },
5295     { eFQ_partial, CSeqFeatData::eQual_partial },
5296     { eFQ_PCR_conditions, CSeqFeatData::eQual_PCR_conditions },
5297     { eFQ_peptide, CSeqFeatData::eQual_bad },
5298     { eFQ_phenotype, CSeqFeatData::eQual_phenotype },
5299     { eFQ_product, CSeqFeatData::eQual_product },
5300     { eFQ_product_quals, CSeqFeatData::eQual_product },
5301     { eFQ_prot_activity, CSeqFeatData::eQual_function },
5302     { eFQ_prot_comment, CSeqFeatData::eQual_note },
5303     { eFQ_prot_EC_number, CSeqFeatData::eQual_EC_number },
5304     { eFQ_prot_note, CSeqFeatData::eQual_note },
5305     { eFQ_prot_method, CSeqFeatData::eQual_note },
5306     { eFQ_prot_conflict, CSeqFeatData::eQual_note },
5307     { eFQ_prot_desc, CSeqFeatData::eQual_note },
5308     { eFQ_prot_missing, CSeqFeatData::eQual_note },
5309     { eFQ_prot_name, CSeqFeatData::eQual_name },
5310     { eFQ_prot_names, CSeqFeatData::eQual_note },
5311     { eFQ_protein_id, CSeqFeatData::eQual_protein_id },
5312     { eFQ_pseudo, CSeqFeatData::eQual_pseudo },
5313     { eFQ_pseudogene, CSeqFeatData::eQual_pseudogene },
5314     { eFQ_region, CSeqFeatData::eQual_note },
5315     { eFQ_region_name, CSeqFeatData::eQual_region_name },
5316     { eFQ_recombination_class, CSeqFeatData::eQual_recombination_class },
5317     { eFQ_regulatory_class, CSeqFeatData::eQual_regulatory_class },
5318     { eFQ_replace, CSeqFeatData::eQual_replace },
5319     { eFQ_ribosomal_slippage, CSeqFeatData::eQual_ribosomal_slippage },
5320     { eFQ_rpt_family, CSeqFeatData::eQual_rpt_family },
5321     { eFQ_rpt_type, CSeqFeatData::eQual_rpt_type },
5322     { eFQ_rpt_unit, CSeqFeatData::eQual_rpt_unit },
5323     { eFQ_rpt_unit_range, CSeqFeatData::eQual_rpt_unit_range },
5324     { eFQ_rpt_unit_seq, CSeqFeatData::eQual_rpt_unit_seq },
5325     { eFQ_rrna_its, CSeqFeatData::eQual_note },
5326     { eFQ_satellite, CSeqFeatData::eQual_satellite },
5327     { eFQ_sec_str_type, CSeqFeatData::eQual_sec_str_type },
5328 //    { eFQ_selenocysteine, CSeqFeatData::eQual_note },
5329 //    { eFQ_selenocysteine_note, CSeqFeatData::eQual_note },
5330     { eFQ_seqfeat_note, CSeqFeatData::eQual_note },
5331     { eFQ_site, CSeqFeatData::eQual_note },
5332     { eFQ_site_type, CSeqFeatData::eQual_site_type },
5333     { eFQ_standard_name, CSeqFeatData::eQual_standard_name },
5334     { eFQ_tag_peptide, CSeqFeatData::eQual_tag_peptide },
5335     { eFQ_trans_splicing, CSeqFeatData::eQual_trans_splicing },
5336     { eFQ_transcription, CSeqFeatData::eQual_bad },
5337     { eFQ_transcript_id, CSeqFeatData::eQual_note },
5338     { eFQ_transcript_id_note, CSeqFeatData::eQual_note },
5339     { eFQ_transl_except, CSeqFeatData::eQual_transl_except },
5340     { eFQ_transl_table, CSeqFeatData::eQual_transl_table },
5341     { eFQ_translation, CSeqFeatData::eQual_translation },
5342     { eFQ_trna_aa, CSeqFeatData::eQual_bad },
5343     { eFQ_trna_codons, CSeqFeatData::eQual_note },
5344     { eFQ_UniProtKB_evidence, CSeqFeatData::eQual_UniProtKB_evidence },
5345     { eFQ_usedin, CSeqFeatData::eQual_usedin },
5346     { eFQ_xtra_prod_quals, CSeqFeatData::eQual_note }
5347 };
5348 typedef CStaticPairArrayMap<EFeatureQualifier, CSeqFeatData::EQualifier> TQualMap;
5349 DEFINE_STATIC_ARRAY_MAP(TQualMap, sc_QualMap, sc_GbToFeatQualMap);
5350 
s_GbToSeqFeatQual(EFeatureQualifier qual)5351 static CSeqFeatData::EQualifier s_GbToSeqFeatQual(EFeatureQualifier qual)
5352 {
5353     TQualMap::const_iterator it = sc_QualMap.find(qual);
5354     if ( it != sc_QualMap.end() ) {
5355         return it->second;
5356     }
5357     return CSeqFeatData::eQual_bad;
5358 }
5359 
5360 
x_DropIllegalQuals(void) const5361 void CFeatureItem::x_DropIllegalQuals(void) const
5362 {
5363     const CSeqFeatData& data = m_Feat.GetData();
5364 
5365     TQI it = m_Quals.begin();
5366     while ( it != m_Quals.end() ) {
5367         CSeqFeatData::EQualifier qual = s_GbToSeqFeatQual(it->first);
5368         if ( !data.IsLegalQualifier(qual) ) {
5369             it = m_Quals.Erase(it);
5370         } else {
5371             ++it;
5372         }
5373     }
5374 }
5375 
x_IsSeqFeatDataFeatureLegal(CSeqFeatData::EQualifier qual)5376 bool CFeatureItem::x_IsSeqFeatDataFeatureLegal( CSeqFeatData::EQualifier qual )
5377 {
5378     const CSeqFeatData& data = m_Feat.GetData();
5379     return data.IsLegalQualifier(qual);
5380 }
5381 
5382 //  ----------------------------------------------------------------------------
x_AddFTableQuals(CBioseqContext & ctx)5383 void CFeatureItem::x_AddFTableQuals(
5384     CBioseqContext& ctx )
5385 //  ----------------------------------------------------------------------------
5386 {
5387     bool pseudo = m_Feat.IsSetPseudo()  &&  m_Feat.GetPseudo();
5388 
5389     const CSeqFeatData& data = m_Feat.GetData();
5390 
5391     switch ( m_Feat.GetData().Which() ) {
5392     case CSeqFeatData::e_Gene:
5393         pseudo |= x_AddFTableGeneQuals(data.GetGene());
5394         break;
5395     case CSeqFeatData::e_Rna:
5396         x_AddFTableRnaQuals(m_Feat, ctx);
5397         break;
5398     case CSeqFeatData::e_Cdregion:
5399         x_AddFTableCdregionQuals(m_Feat, ctx);
5400         break;
5401     case CSeqFeatData::e_Prot:
5402         x_AddFTableProtQuals(m_Feat);
5403         break;
5404     case CSeqFeatData::e_Region:
5405         x_AddFTableRegionQuals(data.GetRegion());
5406         break;
5407     case CSeqFeatData::e_Bond:
5408         x_AddFTableBondQuals(data.GetBond());
5409         break;
5410     case CSeqFeatData::e_Site:
5411         x_AddFTableSiteQuals(data.GetSite());
5412         break;
5413     case CSeqFeatData::e_Psec_str:
5414         x_AddFTablePsecStrQuals(data.GetPsec_str());
5415         break;
5416     case CSeqFeatData::e_Non_std_residue:
5417         x_AddFTableNonStdQuals(data.GetNon_std_residue());
5418         break;
5419     case CSeqFeatData::e_Het:
5420         x_AddFTablePsecStrQuals(data.GetHet());
5421         break;
5422     case CSeqFeatData::e_Biosrc:
5423         x_AddFTableBiosrcQuals(data.GetBiosrc());
5424         break;
5425     default:
5426         break;
5427     }
5428     if ( pseudo ) {
5429         x_AddFTableQual("pseudo");
5430     }
5431     const CGene_ref* grp = m_Feat.GetGeneXref();
5432     if ( grp != 0 ) {
5433         string gene_label;
5434         if (grp->IsSuppressed()) {
5435             gene_label = "-";
5436         } else {
5437             grp->GetLabel(&gene_label);
5438         }
5439         x_AddFTableQual("gene", gene_label);
5440     }
5441     if ( m_Feat.IsSetComment()  &&  !m_Feat.GetComment().empty() ) {
5442         x_AddFTableQual("note", m_Feat.GetComment());
5443     }
5444     if ( m_Feat.IsSetExp_ev() ) {
5445         string ev;
5446         switch ( m_Feat.GetExp_ev() ) {
5447         case CSeq_feat::eExp_ev_experimental:
5448             ev = "experimental";
5449             break;
5450         case CSeq_feat::eExp_ev_not_experimental:
5451             ev = "not_experimental";
5452             break;
5453         }
5454         x_AddFTableQual("evidence", ev);
5455     }
5456     if ( m_Feat.IsSetExcept_text()  &&  !m_Feat.GetExcept_text().empty() ) {
5457         string exception_text = m_Feat.GetExcept_text();
5458         if ( exception_text == "ribosomal slippage" ) {
5459           x_AddFTableQual("ribosomal_slippage");
5460         }
5461         else if ( exception_text == "trans-splicing" ) {
5462           x_AddFTableQual("trans_splicing");
5463         }
5464         else if ( exception_text == "circular RNA" ) {
5465           x_AddFTableQual("circular_RNA");
5466         }
5467         x_AddFTableQual("exception", m_Feat.GetExcept_text());
5468     } else if ( m_Feat.IsSetExcept()  &&  m_Feat.GetExcept() ) {
5469         x_AddFTableQual("exception");
5470     }
5471     const CSeq_feat_Base::TQual & qual = m_Feat.GetQual(); // must store reference since ITERATE macro evaluates 3rd arg multiple times
5472     const bool hide_ids = GetContext()->Config().HideProteinID();
5473     ITERATE( CSeq_feat::TQual, it, qual ) {
5474         const CGb_qual& qual = **it;
5475         const string& key = qual.IsSetQual() ? qual.GetQual() : kEmptyStr;
5476         const string& val = qual.IsSetVal() ? qual.GetVal() : kEmptyStr;
5477         if ( !key.empty()  &&  !val.empty() ) {
5478             if (hide_ids &&
5479                 (key == "protein_id" ||
5480                  key == "orig_protein_id" ||
5481                  key == "transcript_id" ||
5482                  key == "orig_transcript_id"))
5483             {
5484                 continue;
5485             }
5486             x_AddFTableQual(key, val);
5487         }
5488     }
5489     if ( m_Feat.IsSetExt() ) {
5490         x_AddFTableExtQuals(m_Feat.GetExt());
5491     }
5492     if ( data.IsGene() ) {
5493         x_AddFTableDbxref(data.GetGene().GetDb());
5494     } else if ( data.IsProt() ) {
5495         x_AddFTableDbxref(data.GetProt().GetDb());
5496     }
5497     x_AddFTableDbxref(m_Feat.GetDbxref());
5498 }
5499 
5500 //  ----------------------------------------------------------------------------
x_AddFTableExtQuals(const CSeq_feat::TExt & ext)5501 void CFeatureItem::x_AddFTableExtQuals(
5502     const CSeq_feat::TExt& ext )
5503 //  ----------------------------------------------------------------------------
5504 {
5505     ITERATE (CUser_object::TData, it, ext.GetData()) {
5506         const CUser_field& field = **it;
5507         if ( !field.IsSetData() ) {
5508             continue;
5509         }
5510         if ( field.GetData().IsObject() ) {
5511             const CUser_object& obj = field.GetData().GetObject();
5512             x_AddQualsExt(obj);
5513             return;
5514         } else if ( field.GetData().IsObjects() ) {
5515             ITERATE (CUser_field::C_Data::TObjects, o, field.GetData().GetObjects()) {
5516                 x_AddQualsExt(**o);
5517             }
5518             return;
5519         }
5520     }
5521     if ( ext.IsSetType()  &&  ext.GetType().IsStr() ) {
5522         const string& oid = ext.GetType().GetStr();
5523         if ( oid == "GeneOntology" ) {
5524             ITERATE (CUser_object::TData, uf_it, ext.GetData()) {
5525                 const CUser_field& field = **uf_it;
5526                 if ( field.IsSetLabel()  &&  field.GetLabel().IsStr() ) {
5527                     const string& label = field.GetLabel().GetStr();
5528                     string name;
5529                     if ( label == "Process" ) {
5530                         name = "GO_process";
5531                     } else if ( label == "Component" ) {
5532                         name = "GO_component";
5533                     } else if ( label == "Function" ) {
5534                         name = "GO_function";
5535                     }
5536                     if ( name.empty() ) {
5537                         continue;
5538                     }
5539 
5540                     ITERATE (CUser_field::TData::TFields, it, field.GetData().GetFields()) {
5541                         if ( (*it)->GetData().IsFields() ) {
5542                             CFlatGoQVal(**it).Format(m_FTableQuals, name, *GetContext(), 0);;
5543                         }
5544                     }
5545                 }
5546             }
5547         }
5548     }
5549 }
5550 
5551 //  ----------------------------------------------------------------------------
x_AddFTableDbxref(const CSeq_feat::TDbxref & dbxref)5552 void CFeatureItem::x_AddFTableDbxref(
5553     const CSeq_feat::TDbxref& dbxref )
5554 //  ----------------------------------------------------------------------------
5555 {
5556     ITERATE (CSeq_feat::TDbxref, it, dbxref) {
5557         const CDbtag& dbt = **it;
5558         if ( dbt.IsSetDb()  &&  !dbt.GetDb().empty()  &&
5559              dbt.IsSetTag() ) {
5560             const CObject_id& oid = dbt.GetTag();
5561             switch ( oid.Which() ) {
5562             case CObject_id::e_Str:
5563                 if ( !oid.GetStr().empty() ) {
5564                     x_AddFTableQual("db_xref", dbt.GetDb() + ":" + oid.GetStr());
5565                 }
5566                 break;
5567             case CObject_id::e_Id:
5568                 x_AddFTableQual("db_xref", dbt.GetDb() + ":" + NStr::IntToString(oid.GetId()));
5569                 break;
5570             default:
5571                 break;
5572             }
5573         }
5574     }
5575 }
5576 
5577 //  ----------------------------------------------------------------------------
x_AddFTableGeneQuals(const CGene_ref & gene)5578 bool CFeatureItem::x_AddFTableGeneQuals(
5579     const CGene_ref& gene )
5580 //  ----------------------------------------------------------------------------
5581 {
5582     if ( gene.IsSetLocus()  &&  !gene.GetLocus().empty() ) {
5583         x_AddFTableQual("gene", gene.GetLocus(), CFormatQual::eTrim_WhitespaceOnly);
5584     }
5585     if ( gene.IsSetAllele()  &&  !gene.GetAllele().empty() ) {
5586         x_AddFTableQual("allele", gene.GetAllele());
5587     }
5588     ITERATE (CGene_ref::TSyn, it, gene.GetSyn()) {
5589         x_AddFTableQual("gene_syn", *it, CFormatQual::eTrim_WhitespaceOnly);
5590     }
5591     if ( gene.IsSetDesc()  &&  !gene.GetDesc().empty() ) {
5592         x_AddFTableQual("gene_desc", gene.GetDesc());
5593     }
5594     if ( gene.IsSetMaploc()  &&  !gene.GetMaploc().empty() ) {
5595         x_AddFTableQual("map", gene.GetMaploc());
5596     }
5597     if ( gene.IsSetLocus_tag()  &&  !gene.GetLocus_tag().empty() ) {
5598         x_AddFTableQual("locus_tag", gene.GetLocus_tag(), CFormatQual::eTrim_WhitespaceOnly);
5599     }
5600 
5601     return (gene.IsSetPseudo()  &&  gene.GetPseudo());
5602 }
5603 
5604 
x_AddFTableAnticodon(const CTrna_ext & trna_ext,CBioseqContext & ctx)5605 void CFeatureItem::x_AddFTableAnticodon(
5606         const CTrna_ext& trna_ext,
5607         CBioseqContext& ctx)
5608 {
5609 
5610 
5611     if (!trna_ext.IsSetAnticodon()) {
5612         return;
5613     }
5614 
5615     const auto& loc = trna_ext.GetAnticodon();
5616     string pos = CFlatSeqLoc(loc, ctx).GetString();
5617 
5618     string aa;
5619     switch(trna_ext.GetAa().Which()) {
5620     case CTrna_ext::C_Aa::e_Iupacaa:
5621         aa = GetAAName(trna_ext.GetAa().GetIupacaa(), true);
5622         break;
5623     case CTrna_ext::C_Aa::e_Ncbieaa:
5624         aa = GetAAName(trna_ext.GetAa().GetNcbieaa(), true);
5625         break;
5626     case CTrna_ext::C_Aa::e_Ncbi8aa:
5627         aa = GetAAName(trna_ext.GetAa().GetNcbi8aa(), false);
5628         break;
5629     case CTrna_ext::C_Aa::e_Ncbistdaa:
5630         aa = GetAAName(trna_ext.GetAa().GetNcbistdaa(), false);
5631         break;
5632     default:
5633         break;
5634     }
5635 
5636     string seq("---");
5637     try {
5638         CSeqVector seq_vec(loc, ctx.GetScope(), CBioseq_Handle::eCoding_Iupac);
5639         seq_vec.GetSeqData(0, 3, seq);
5640         NStr::ToLower(seq);
5641     }
5642     catch(...)
5643     {}
5644 
5645 
5646     x_AddFTableQual("anticodon", "(pos:" + pos + ",aa:" + aa + ",seq:" + seq + ")");
5647 
5648 }
5649 
5650 //  ----------------------------------------------------------------------------
x_AddFTableRnaQuals(const CMappedFeat & feat,CBioseqContext & ctx)5651 void CFeatureItem::x_AddFTableRnaQuals(
5652     const CMappedFeat& feat,
5653     CBioseqContext& ctx )
5654 //  ----------------------------------------------------------------------------
5655 {
5656     string label;
5657 
5658     if ( !feat.GetData().IsRna() ) {
5659         return;
5660     }
5661     const CFlatFileConfig& cfg = GetContext()->Config();
5662     const CSeqFeatData::TRna& rna = feat.GetData().GetRna();
5663     if (rna.IsSetExt()) {
5664         const CRNA_ref::TExt& ext = rna.GetExt();
5665         if (ext.IsName()) {
5666             if (!ext.GetName().empty()) {
5667                 x_AddFTableQual("product", ext.GetName());
5668             }
5669         } else if (ext.IsTRNA()) {
5670             feature::GetLabel(feat.GetOriginalFeature(), &label,
5671                               feature::fFGL_Content, &ctx.GetScope());
5672             x_AddFTableQual("product", label);
5673             // check for anticodon
5674              x_AddFTableAnticodon(ext.GetTRNA(), ctx);
5675         }
5676         else if ( ext.IsGen() ) {
5677             const CRNA_gen& gen = ext.GetGen();
5678             if ( gen.IsSetClass() ) {
5679                 if ( gen.IsLegalClass()) {
5680                     x_AddFTableQual("ncRNA_class", gen.GetClass());
5681                 }
5682                 else {
5683                     x_AddFTableQual("ncRNA_class", "other");
5684                     x_AddFTableQual("note", gen.GetClass());
5685                 }
5686             }
5687 
5688             if ( gen.IsSetProduct() ) {
5689                 x_AddFTableQual("product", gen.GetProduct());
5690             }
5691         }
5692     }
5693 
5694     if ( feat.IsSetProduct() && !cfg.HideProteinID()) {
5695         CBioseq_Handle prod =
5696             ctx.GetScope().GetBioseqHandle(m_Feat.GetProductId());
5697         if ( prod ) {
5698             string id_str = x_SeqIdWriteForTable(*(prod.GetBioseqCore()), ctx.Config().SuppressLocalId(), !(ctx.Config().HideGI() || ctx.Config().IsPolicyFtp()));
5699             if (!NStr::IsBlank(id_str)) {
5700                 x_AddFTableQual("transcript_id", id_str);
5701             }
5702         }
5703     }
5704 }
5705 
5706 
5707 // originally SeqIdWriteForTable in the C Toolkit
5708 // specific Seq-ids are included in the value, in a specific order
x_SeqIdWriteForTable(const CBioseq & seq,bool suppress_local,bool giOK)5709 string CFeatureItem::x_SeqIdWriteForTable(const CBioseq& seq, bool suppress_local, bool giOK)
5710 
5711 {
5712     if (!seq.IsSetId()) {
5713         return kEmptyStr;
5714     }
5715     const CSeq_id* accn = NULL;
5716     const CSeq_id* local = NULL;
5717     const CSeq_id* general = NULL;
5718     const CSeq_id* gi = NULL;
5719 
5720     ITERATE(CBioseq::TId, it, seq.GetId()) {
5721         switch ((*it)->Which()) {
5722         case CSeq_id::e_Local:
5723             local = it->GetPointer();
5724             break;
5725         case CSeq_id::e_Genbank:
5726         case CSeq_id::e_Embl:
5727         case CSeq_id::e_Pir:
5728         case CSeq_id::e_Swissprot:
5729         case CSeq_id::e_Ddbj:
5730         case CSeq_id::e_Prf:
5731         case CSeq_id::e_Tpg:
5732         case CSeq_id::e_Tpe:
5733         case CSeq_id::e_Tpd:
5734         case CSeq_id::e_Other:
5735         case CSeq_id::e_Gpipe:
5736             accn = it->GetPointer();
5737             break;
5738         case CSeq_id::e_General:
5739             if (!(*it)->GetGeneral().IsSkippable()) {
5740                 general = it->GetPointer();
5741             }
5742             break;
5743         case CSeq_id::e_Gi:
5744             gi = it->GetPointer();
5745             break;
5746         default:
5747             break;
5748         }
5749     }
5750 
5751     string label;
5752 
5753     if (accn != NULL) {
5754         label = accn->AsFastaString();
5755     }
5756 
5757     if (general != NULL) {
5758         if (!label.empty()) {
5759             label += "|";
5760         }
5761         label += general->AsFastaString();
5762     }
5763 
5764     if (local != NULL && (!suppress_local) && label.empty()) {
5765         label = local->AsFastaString();
5766     }
5767 
5768     if (gi != NULL && giOK && label.empty()) {
5769         label = gi->AsFastaString();
5770     }
5771 
5772     return label;
5773 }
5774 
5775 
5776 //  ----------------------------------------------------------------------------
x_AddFTableCdregionQuals(const CMappedFeat & feat,CBioseqContext & ctx)5777 void CFeatureItem::x_AddFTableCdregionQuals(
5778     const CMappedFeat& feat,
5779     CBioseqContext& ctx )
5780 //  ----------------------------------------------------------------------------
5781 {
5782     CBioseq_Handle prod;
5783     const CFlatFileConfig& cfg = GetContext()->Config();
5784     if ( feat.IsSetProduct() ) {
5785         prod = ctx.GetScope().GetBioseqHandle(feat.GetProductId());
5786     }
5787 
5788     const CProt_ref* prot_xref = feat.GetProtXref();
5789     if (prot_xref) {
5790         x_AddFTableProtQuals(*prot_xref);
5791     }
5792     else
5793     if ( prod ) {
5794         CMappedFeat prot_ref = s_GetBestProtFeature(prod);
5795         if ( prot_ref ) {
5796             /// FIXME: we take the first; we want the longest
5797             x_AddFTableProtQuals(prot_ref);
5798         }
5799     }
5800     const CCdregion& cdr = feat.GetData().GetCdregion();
5801     if ( cdr.IsSetFrame()  &&  cdr.GetFrame() > CCdregion::eFrame_one ) {
5802         x_AddFTableQual("codon_start", NStr::IntToString(cdr.GetFrame()));
5803     }
5804     ITERATE (CCdregion::TCode_break, it, cdr.GetCode_break()) {
5805         string pos = CFlatSeqLoc((*it)->GetLoc(), ctx).GetString();
5806         string aa  = "OTHER";
5807         switch ((*it)->GetAa().Which()) {
5808         case CCode_break::C_Aa::e_Ncbieaa:
5809             aa = GetAAName((*it)->GetAa().GetNcbieaa(), true);
5810             break;
5811         case CCode_break::C_Aa::e_Ncbi8aa:
5812             aa = GetAAName((*it)->GetAa().GetNcbi8aa(), false);
5813             break;
5814         case CCode_break::C_Aa::e_Ncbistdaa:
5815             aa = GetAAName((*it)->GetAa().GetNcbistdaa(), false);
5816             break;
5817         default:
5818             break;
5819         }
5820         x_AddFTableQual("transl_except", "(pos:" + pos + ",aa:" + aa + ")");
5821     }
5822 
5823     if (cdr.IsSetCode()) {
5824         int gcode = cdr.GetCode().GetId();
5825         if (gcode > 1 && gcode != 255) {
5826             x_AddFTableQual("transl_table", NStr::NumericToString(gcode));
5827         }
5828     }
5829 
5830     if (prod && !cfg.HideProteinID()) {
5831         string id_str = x_SeqIdWriteForTable(*(prod.GetBioseqCore()), ctx.Config().SuppressLocalId(), !(ctx.Config().HideGI() || ctx.Config().IsPolicyFtp()));
5832         if (!NStr::IsBlank(id_str)) {
5833             x_AddFTableQual("protein_id", id_str);
5834         }
5835     }
5836 }
5837 
5838 //  ----------------------------------------------------------------------------
x_AddFTableProtQuals(const CMappedFeat & prot)5839 void CFeatureItem::x_AddFTableProtQuals(
5840     const CMappedFeat& prot )
5841 //  ----------------------------------------------------------------------------
5842 {
5843     if ( !prot.GetData().IsProt() ) {
5844         return;
5845     }
5846     x_AddFTableProtQuals(prot.GetData().GetProt());
5847 
5848     if ( prot.IsSetComment()  &&  !prot.GetComment().empty() ) {
5849         x_AddFTableQual("prot_note", prot.GetComment());
5850     }
5851 }
5852 
5853 //  ----------------------------------------------------------------------------
x_AddFTableProtQuals(const CProt_ref & prot_ref)5854 void CFeatureItem::x_AddFTableProtQuals(
5855     const CProt_ref& prot_ref)
5856 //  ----------------------------------------------------------------------------
5857 {
5858     ITERATE (CProt_ref::TName, it, prot_ref.GetName()) {
5859         if ( !it->empty() ) {
5860             x_AddFTableQual("product", *it);
5861         }
5862     }
5863     if ( prot_ref.IsSetDesc()  &&  !prot_ref.GetDesc().empty() ) {
5864         x_AddFTableQual("prot_desc", prot_ref.GetDesc());
5865     }
5866     ITERATE (CProt_ref::TActivity, it, prot_ref.GetActivity()) {
5867         if ( !it->empty() ) {
5868             x_AddFTableQual("function", *it);
5869         }
5870     }
5871     ITERATE (CProt_ref::TEc, it, prot_ref.GetEc()) {
5872         if ( !it->empty() ) {
5873             x_AddFTableQual("EC_number", *it);
5874         }
5875     }
5876 }
5877 
5878 //  ----------------------------------------------------------------------------
x_AddFTableRegionQuals(const CSeqFeatData::TRegion & region)5879 void CFeatureItem::x_AddFTableRegionQuals(
5880     const CSeqFeatData::TRegion& region )
5881 //  ----------------------------------------------------------------------------
5882 {
5883     if ( !region.empty() ) {
5884         x_AddFTableQual("region", region);
5885     }
5886 }
5887 
5888 //  ----------------------------------------------------------------------------
x_AddFTableBondQuals(const CSeqFeatData::TBond & bond)5889 void CFeatureItem::x_AddFTableBondQuals(
5890     const CSeqFeatData::TBond& bond )
5891 //  ----------------------------------------------------------------------------
5892 {
5893     x_AddFTableQual("bond_type", s_GetBondName(bond));
5894 }
5895 
5896 //  ----------------------------------------------------------------------------
x_AddFTableSiteQuals(const CSeqFeatData::TSite & site)5897 void CFeatureItem::x_AddFTableSiteQuals(
5898     const CSeqFeatData::TSite& site)
5899 //  ----------------------------------------------------------------------------
5900 {
5901     x_AddFTableQual("site_type", s_GetSiteName(site));
5902 }
5903 
5904 //  ----------------------------------------------------------------------------
x_AddFTablePsecStrQuals(const CSeqFeatData::TPsec_str & psec_str)5905 void CFeatureItem::x_AddFTablePsecStrQuals(
5906     const CSeqFeatData::TPsec_str& psec_str )
5907 //  ----------------------------------------------------------------------------
5908 {
5909     const string& psec = CSeqFeatData::ENUM_METHOD_NAME(EPsec_str)()->FindName(
5910         psec_str, true );
5911     x_AddFTableQual("sec_str_type", psec);
5912 }
5913 
5914 //  ----------------------------------------------------------------------------
x_AddFTablePsecStrQuals(const CSeqFeatData::THet & het)5915 void CFeatureItem::x_AddFTablePsecStrQuals(
5916     const CSeqFeatData::THet& het)
5917 //  ----------------------------------------------------------------------------
5918 {
5919     if ( !het.Get().empty() ) {
5920         x_AddFTableQual("heterogen", het.Get());
5921     }
5922 }
5923 
5924 //  ----------------------------------------------------------------------------
x_AddFTableNonStdQuals(const CSeqFeatData::TNon_std_residue & res)5925 void CFeatureItem::x_AddFTableNonStdQuals(
5926     const CSeqFeatData::TNon_std_residue& res )
5927 //  ----------------------------------------------------------------------------
5928 {
5929     if ( !res.empty() ) {
5930         x_AddFTableQual("non_std_residue", res);
5931     }
5932 }
5933 
5934 
s_GetSubtypeString(const COrgMod::TSubtype & subtype)5935 static const string s_GetSubtypeString(const COrgMod::TSubtype& subtype)
5936 {
5937     switch ( subtype ) {
5938         case COrgMod::eSubtype_strain:           return "strain";
5939         case COrgMod::eSubtype_substrain:        return "substrain";
5940         case COrgMod::eSubtype_type:             return "type";
5941         case COrgMod::eSubtype_subtype:          return "subtype";
5942         case COrgMod::eSubtype_variety:          return "variety";
5943         case COrgMod::eSubtype_serotype:         return "serotype";
5944         case COrgMod::eSubtype_serogroup:        return "serogroup";
5945         case COrgMod::eSubtype_serovar:          return "serovar";
5946         case COrgMod::eSubtype_cultivar:         return "cultivar";
5947         case COrgMod::eSubtype_pathovar:         return "pathovar";
5948         case COrgMod::eSubtype_chemovar:         return "chemovar";
5949         case COrgMod::eSubtype_biovar:           return "biovar";
5950         case COrgMod::eSubtype_biotype:          return "biotype";
5951         case COrgMod::eSubtype_group:            return "group";
5952         case COrgMod::eSubtype_subgroup:         return "subgroup";
5953         case COrgMod::eSubtype_isolate:          return "isolate";
5954         case COrgMod::eSubtype_common:           return "common";
5955         case COrgMod::eSubtype_acronym:          return "acronym";
5956         case COrgMod::eSubtype_dosage:           return "dosage";
5957         case COrgMod::eSubtype_nat_host:         return "nat_host";
5958         case COrgMod::eSubtype_sub_species:      return "sub_species";
5959         case COrgMod::eSubtype_specimen_voucher: return "specimen_voucher";
5960         case COrgMod::eSubtype_authority:        return "authority";
5961         case COrgMod::eSubtype_forma:            return "forma";
5962         case COrgMod::eSubtype_forma_specialis:  return "dosage";
5963         case COrgMod::eSubtype_ecotype:          return "ecotype";
5964         case COrgMod::eSubtype_synonym:          return "synonym";
5965         case COrgMod::eSubtype_anamorph:         return "anamorph";
5966         case COrgMod::eSubtype_teleomorph:       return "teleomorph";
5967         case COrgMod::eSubtype_breed:            return "breed";
5968         case COrgMod::eSubtype_gb_acronym:       return "gb_acronym";
5969         case COrgMod::eSubtype_gb_anamorph:      return "gb_anamorph";
5970         case COrgMod::eSubtype_gb_synonym:       return "gb_synonym";
5971         case COrgMod::eSubtype_old_lineage:      return "old_lineage";
5972         case COrgMod::eSubtype_old_name:         return "old_name";
5973         case COrgMod::eSubtype_culture_collection: return "culture_collection";
5974         case COrgMod::eSubtype_bio_material:     return "bio_material";
5975         case COrgMod::eSubtype_metagenome_source: return "metagenome_source";
5976         case COrgMod::eSubtype_type_material:    return "type_material";
5977         case COrgMod::eSubtype_other:            return "note";
5978         default:                                 return kEmptyStr;
5979     }
5980     return kEmptyStr;
5981 }
5982 
5983 
s_GetSubsourceString(const CSubSource::TSubtype & subtype)5984 static const string s_GetSubsourceString(const CSubSource::TSubtype& subtype)
5985 {
5986     switch ( subtype ) {
5987         case CSubSource::eSubtype_chromosome: return "chromosome";
5988         case CSubSource::eSubtype_map: return "map";
5989         case CSubSource::eSubtype_clone: return "clone";
5990         case CSubSource::eSubtype_subclone: return "subclone";
5991         case CSubSource::eSubtype_haplogroup: return "haplogroup";
5992         case CSubSource::eSubtype_haplotype: return "haplotype";
5993         case CSubSource::eSubtype_genotype: return "genotype";
5994         case CSubSource::eSubtype_sex: return "sex";
5995         case CSubSource::eSubtype_cell_line: return "cell_line";
5996         case CSubSource::eSubtype_cell_type: return "cell_type";
5997         case CSubSource::eSubtype_tissue_type: return "tissue_type";
5998         case CSubSource::eSubtype_clone_lib: return "clone_lib";
5999         case CSubSource::eSubtype_dev_stage: return "dev_stage";
6000         case CSubSource::eSubtype_frequency: return "frequency";
6001         case CSubSource::eSubtype_germline: return "germline";
6002         case CSubSource::eSubtype_rearranged: return "rearranged";
6003         case CSubSource::eSubtype_lab_host: return "lab_host";
6004         case CSubSource::eSubtype_pop_variant: return "pop_variant";
6005         case CSubSource::eSubtype_tissue_lib: return "tissue_lib";
6006         case CSubSource::eSubtype_plasmid_name: return "plasmid_name";
6007         case CSubSource::eSubtype_transposon_name: return "transposon_name";
6008         case CSubSource::eSubtype_insertion_seq_name: return "insertion_seq_name";
6009         case CSubSource::eSubtype_plastid_name: return "plastid_name";
6010         case CSubSource::eSubtype_country: return "country";
6011         case CSubSource::eSubtype_segment: return "segment";
6012         case CSubSource::eSubtype_endogenous_virus_name: return "endogenous_virus_name";
6013         case CSubSource::eSubtype_transgenic: return "transgenic";
6014         case CSubSource::eSubtype_environmental_sample: return "environmental_sample";
6015         case CSubSource::eSubtype_isolation_source: return "isolation_source";
6016         case CSubSource::eSubtype_other: return "note";
6017         default: return kEmptyStr;
6018     }
6019     return kEmptyStr;
6020 }
6021 
6022 //  ----------------------------------------------------------------------------
x_AddFTableBiosrcQuals(const CBioSource & src)6023 void CFeatureItem::x_AddFTableBiosrcQuals(
6024     const CBioSource& src )
6025 //  ----------------------------------------------------------------------------
6026 {
6027     if ( src.IsSetOrg() ) {
6028         const CBioSource::TOrg& org = src.GetOrg();
6029 
6030         if ( org.IsSetTaxname()  &&  !org.GetTaxname().empty() ) {
6031             x_AddFTableQual("organism", org.GetTaxname());
6032         }
6033 
6034         if ( org.IsSetOrgname() ) {
6035             ITERATE (COrgName::TMod, it, org.GetOrgname().GetMod()) {
6036                 if ( (*it)->IsSetSubtype() ) {
6037                     string str = s_GetSubtypeString((*it)->GetSubtype());
6038                     if ( str.empty() ) {
6039                         continue;
6040                     }
6041                     if ( (*it)->IsSetSubname()  &&  !(*it)->GetSubname().empty() ) {
6042                         str += (*it)->GetSubname();
6043                     }
6044                     x_AddFTableQual(str);
6045                 }
6046             }
6047         }
6048     }
6049 
6050     ITERATE (CBioSource::TSubtype, it, src.GetSubtype()) {
6051         if ( (*it)->IsSetSubtype() ) {
6052             string str = s_GetSubsourceString((*it)->GetSubtype());
6053             if ( str.empty() ) {
6054                 continue;
6055             }
6056             if ( (*it)->IsSetName() ) {
6057                 str += (*it)->GetName();
6058             }
6059             x_AddFTableQual(str);
6060         }
6061     }
6062 }
6063 
6064 
6065 /////////////////////////////////////////////////////////////////////////////
6066 //   Source Feature
6067 /////////////////////////////////////////////////////////////////////////////
6068 
CSourceFeatureItem(const CMappedFeat & feat,CBioseqContext & ctx,CRef<feature::CFeatTree> ftree,const CSeq_loc * loc)6069 CSourceFeatureItem::CSourceFeatureItem
6070 (const CMappedFeat& feat,
6071  CBioseqContext& ctx,
6072  CRef<feature::CFeatTree> ftree,
6073  const CSeq_loc* loc)
6074     : CFeatureItemBase(feat, ctx, ftree, loc ? loc : &feat.GetLocation()),
6075       m_WasDesc(false), m_IsFocus(false), m_IsSynthetic(false)
6076 {
6077     x_GatherInfo(ctx);
6078 }
6079 
6080 
GetItemType(void) const6081 IFlatItem::EItem CSourceFeatureItem::GetItemType(void) const
6082 {
6083     return eItem_SourceFeat;
6084 }
6085 
x_GatherInfo(CBioseqContext & ctx)6086 void CSourceFeatureItem::x_GatherInfo(CBioseqContext& ctx)
6087 {
6088     const CBioSource& bsrc = GetSource();
6089     if (!bsrc.IsSetOrg()) {
6090         m_Feat = CMappedFeat();
6091         x_SetSkip();
6092         return;
6093     }
6094 
6095     m_IsFocus = bsrc.IsSetIs_focus();
6096     if (bsrc.GetOrigin() == CBioSource::eOrigin_synthetic) {
6097         m_IsSynthetic = true;
6098     }
6099     if (!m_IsSynthetic  &&  bsrc.GetOrg().IsSetOrgname()) {
6100         m_IsSynthetic = bsrc.GetOrg().GetOrgname().IsSetDiv()  &&
6101             NStr::EqualNocase(bsrc.GetOrg().GetOrgname().GetDiv(), "SYN");
6102     }
6103     if (!m_IsSynthetic  &&  bsrc.IsSetOrg() && bsrc.GetOrg().IsSetTaxname()) {
6104         if (NStr::EqualNocase(bsrc.GetOrg().GetTaxname(), "synthetic construct")) {
6105             m_IsSynthetic = true;
6106         }
6107     }
6108     x_AddQuals(ctx);
6109 }
6110 
6111 
x_AddQuals(CBioseqContext & ctx)6112 void CSourceFeatureItem::x_AddQuals(CBioseqContext& ctx)
6113 {
6114     const CSeqFeatData& data = m_Feat.GetData();
6115     _ASSERT(data.IsOrg()  ||  data.IsBiosrc());
6116     // add various generic qualifiers...
6117     x_AddQual(eSQ_mol_type,
6118               new CFlatMolTypeQVal(ctx.GetBiomol(), ctx.GetMol()));
6119     x_AddQual(eSQ_submitter_seqid,
6120               new CFlatSubmitterSeqidQVal(ctx.GetTech()));
6121     if (m_Feat.IsSetComment()) {
6122         x_AddQual(eSQ_seqfeat_note, new CFlatStringQVal(m_Feat.GetComment()));
6123     }
6124     if (m_Feat.IsSetTitle()) {
6125         x_AddQual(eSQ_label, new CFlatLabelQVal(m_Feat.GetTitle()));
6126     }
6127     if (m_Feat.IsSetCit()) {
6128         x_AddQual(eSQ_citation, new CFlatPubSetQVal(m_Feat.GetCit()));
6129     }
6130     if (m_Feat.IsSetDbxref()) {
6131         x_AddQual(eSQ_org_xref, new CFlatXrefQVal(m_Feat.GetDbxref()));
6132     }
6133 
6134     // add qualifiers from biosource fields
6135     x_AddQuals(data.GetBiosrc(), ctx);
6136 }
6137 
6138 
s_OrgModToSlot(const COrgMod & om)6139 static ESourceQualifier s_OrgModToSlot(const COrgMod& om)
6140 {
6141     return GetSourceQualOfOrgMod( static_cast<COrgMod::ESubtype>(om.GetSubtype()) );
6142 }
6143 
s_GetSpecimenVoucherText(CBioseqContext & ctx,const string & strRawName)6144 static string s_GetSpecimenVoucherText(
6145     CBioseqContext& ctx,
6146     const string& strRawName )
6147 {
6148     if ( ! ctx.Config().DoHTML() ) {
6149         return strRawName;
6150     }
6151 
6152     // doesn't COrgMod already have the code for this?
6153     string inst;
6154     string coll;
6155     string id;
6156     {
6157         if( ! COrgMod::ParseStructuredVoucher(strRawName, inst, coll, id) || NStr::IsBlank(inst)) {
6158             return strRawName;
6159         }
6160         if( ! coll.empty() ) {
6161             inst += ':' + coll;
6162         }
6163     }
6164 
6165     CInstInfoMap::TVoucherInfoRef voucher_info_ref = CInstInfoMap::GetInstitutionVoucherInfo( inst );
6166     if( voucher_info_ref ) {
6167         CNcbiOstrstream text;
6168 
6169         string inst_full_name =   COrgMod::GetInstitutionFullName( inst );
6170         if (inst_full_name.empty()) {
6171             inst_full_name = voucher_info_ref->m_InstFullName;
6172         }
6173         text << "<acronym title=\""
6174              << NStr::Replace(inst_full_name, "\"", "&quot;")
6175              << "\" class=\"voucher\">"
6176              << inst << "</acronym>"
6177              << ":"
6178              << "<a href=\"" << *voucher_info_ref->m_Links;
6179 
6180         if( voucher_info_ref->m_PrependInstitute) {
6181             text << inst;
6182         }
6183         if( voucher_info_ref->m_PrependCollection) {
6184             text << coll;
6185         }
6186         if( voucher_info_ref->m_Prefix != NULL ) {
6187             text << *voucher_info_ref->m_Prefix;
6188         }
6189         if( voucher_info_ref->m_Trim != NULL ) {
6190             const string& trim = *voucher_info_ref->m_Trim;
6191             if (NStr::StartsWith(id, trim)) {
6192                 NStr::TrimPrefixInPlace(id, trim);
6193                 NStr::TruncateSpacesInPlace(id);
6194             }
6195         }
6196         if( voucher_info_ref->m_PadTo > 0 && voucher_info_ref->m_PadWith != NULL) {
6197             int len_id = id.length();
6198             int len_pad = voucher_info_ref->m_PadWith->length();
6199             while (len_id < voucher_info_ref->m_PadTo) {
6200                 text << *voucher_info_ref->m_PadWith;
6201                 len_id += len_pad;
6202             }
6203         }
6204         text << id;
6205         if( voucher_info_ref->m_Suffix ) {
6206             text << *voucher_info_ref->m_Suffix;
6207         }
6208         text << "\">" << id << "</a>";
6209         return CNcbiOstrstreamToString(text);
6210     } else {
6211         // fall back on at least getting institution name
6212         const string &inst_full_name =  COrgMod::GetInstitutionFullName( inst );
6213         if( ! inst_full_name.empty() ) {
6214             CNcbiOstrstream text;
6215 
6216             text << "<acronym title=\"" << NStr::Replace(inst_full_name, "\"", "&quot;") << "\" class=\"voucher\">"
6217                 << inst << "</acronym>"
6218                 << ":" << id;
6219 
6220             return CNcbiOstrstreamToString(text);
6221         } else {
6222             // if all else fails, return the string we were initially given
6223             return strRawName;
6224         }
6225     }
6226 }
6227 
6228 
x_AddQuals(const COrg_ref & org,CBioseqContext & ctx) const6229 void CSourceFeatureItem::x_AddQuals(const COrg_ref& org, CBioseqContext& ctx) const
6230 {
6231     CTempString taxname;
6232     CTempString common;
6233     if ( org.IsSetTaxname() ) {
6234         taxname = org.GetTaxname();
6235     }
6236     if ( taxname.empty()  &&  ctx.Config().NeedOrganismQual() ) {
6237         taxname = "unknown";
6238         if ( org.IsSetCommon() ) {
6239             common = org.GetCommon();
6240         }
6241     }
6242     if ( !taxname.empty() ) {
6243         x_AddQual(eSQ_organism, new CFlatStringQVal(taxname));
6244     }
6245     if ( !common.empty() ) {
6246         x_AddQual(eSQ_common_name, new CFlatStringQVal(common));
6247     }
6248     if ( org.IsSetOrgname() ) {
6249         set<CTempString> ecotypesSeen;  // holds the ones we've seen so don't show them again
6250         ecotypesSeen.insert(kEmptyStr); // empty string is always considered seen so we hide it
6251         ITERATE (COrgName::TMod, it, org.GetOrgname().GetMod()) {
6252 
6253             const COrgMod& mod = **it;
6254             const string & sSubname = (
6255                 mod.CanGetSubname() ? mod.GetSubname() : kEmptyStr );
6256 
6257             ESourceQualifier slot = s_OrgModToSlot(**it);
6258             switch( slot ) {
6259             case eSQ_ecotype:
6260                 if( ecotypesSeen.find(sSubname) != ecotypesSeen.end() ) {
6261                     break; // already seen
6262                 }
6263                 ecotypesSeen.insert( sSubname );
6264                 x_AddQual(slot, new CFlatOrgModQVal(mod));
6265                 break;
6266             case eSQ_none:
6267                 break;
6268             default:
6269                 {
6270                     const COrgMod::TSubtype stype = mod.GetSubtype();
6271                     if( COrgMod::HoldsInstitutionCode(stype) ) {
6272                         CRef<COrgMod> new_mod( new COrgMod(stype,
6273                             (  sSubname.empty() ? kEmptyStr : s_GetSpecimenVoucherText(ctx, sSubname) ) ));
6274                         x_AddQual(slot, new CFlatOrgModQVal(*new_mod));
6275                     } else if (stype == COrgMod::eSubtype_type_material && (! COrgMod::IsINSDCValidTypeMaterial(sSubname))) {
6276                         CRef<COrgMod> new_mod( new COrgMod(COrgMod::eSubtype_other,
6277                             (  sSubname.empty() ? kEmptyStr : "type_material: " + sSubname ) ));
6278                         x_AddQual(eSQ_orgmod_note, new CFlatOrgModQVal(*new_mod));
6279                     } else {
6280                             x_AddQual(slot, new CFlatOrgModQVal(**it));
6281                 }
6282                 }
6283                 break;
6284             }
6285         }
6286     }
6287     if (!WasDesc()  &&  org.IsSetMod()) {
6288         x_AddQual(eSQ_unstructured, new CFlatStringListQVal(org.GetMod()));
6289     }
6290     if ( org.IsSetDb() ) {
6291         x_AddQual(eSQ_db_xref, new CFlatXrefQVal(org.GetDb()));
6292     }
6293 }
6294 
x_AddPcrPrimersQuals(const CBioSource & src,CBioseqContext & ctx) const6295 void CSourceFeatureItem::x_AddPcrPrimersQuals(const CBioSource& src, CBioseqContext& ctx) const
6296 {
6297     if( ! src.IsSetPcr_primers() ) {
6298         return;
6299     }
6300 
6301     const CBioSource_Base::TPcr_primers & primers = src.GetPcr_primers();
6302     if( primers.CanGet() ) {
6303         ITERATE( CBioSource_Base::TPcr_primers::Tdata, it, primers.Get() ) {
6304             string primer_value;
6305 
6306             bool has_fwd_seq = false;
6307             bool has_rev_seq = false;
6308 
6309             if( (*it)->IsSetForward() ) {
6310                 const CPCRReaction_Base::TForward &forward = (*it)->GetForward();
6311                 if( forward.CanGet() ) {
6312                     ITERATE( CPCRReaction_Base::TForward::Tdata, it2, forward.Get() ) {
6313                         const string &fwd_name = ( (*it2)->CanGetName() ? (*it2)->GetName().Get() : kEmptyStr );
6314                         if( ! fwd_name.empty() ) {
6315                             s_AddPcrPrimersQualsAppend( primer_value, "fwd_name: ", fwd_name);
6316                         }
6317                         const string &fwd_seq = ( (*it2)->CanGetSeq() ? (*it2)->GetSeq().Get() : kEmptyStr );
6318                         // NStr::ToLower( fwd_seq );
6319                         if( ! fwd_seq.empty() ) {
6320                             s_AddPcrPrimersQualsAppend( primer_value, "fwd_seq: ", fwd_seq);
6321                             has_fwd_seq = true;
6322                         }
6323                     }
6324                 }
6325             }
6326             if( (*it)->IsSetReverse() ) {
6327                 const CPCRReaction_Base::TReverse &reverse = (*it)->GetReverse();
6328                 if( reverse.CanGet() ) {
6329                     ITERATE( CPCRReaction_Base::TReverse::Tdata, it2, reverse.Get() ) {
6330                         const string &rev_name = ((*it2)->CanGetName() ? (*it2)->GetName().Get() : kEmptyStr );
6331                         if( ! rev_name.empty() ) {
6332                             s_AddPcrPrimersQualsAppend( primer_value, "rev_name: ", rev_name);
6333                         }
6334                         const string &rev_seq = ( (*it2)->CanGetSeq() ? (*it2)->GetSeq().Get() : kEmptyStr );
6335                         // NStr::ToLower( rev_seq ); // do we need this?
6336                         if( ! rev_seq.empty() ) {
6337                             s_AddPcrPrimersQualsAppend( primer_value, "rev_seq: ", rev_seq);
6338                             has_rev_seq = true;
6339                         }
6340                     }
6341                 }
6342             }
6343 
6344             if( ! primer_value.empty() ) {
6345                 const bool is_in_note = ( ! has_fwd_seq || ! has_rev_seq );
6346                 if( is_in_note ) {
6347                     primer_value = "PCR_primers=" + primer_value;
6348                 }
6349                 const ESourceQualifier srcQual = ( is_in_note ? eSQ_pcr_primer_note : eSQ_PCR_primers );
6350                 x_AddQual( srcQual, new CFlatStringQVal( primer_value ) );
6351             }
6352         }
6353     }
6354 }
6355 
s_SubSourceToSlot(const CSubSource & ss)6356 static ESourceQualifier s_SubSourceToSlot(const CSubSource& ss)
6357 {
6358     return GetSourceQualOfSubSource( static_cast<CSubSource::ESubtype>(ss.GetSubtype()) );
6359 }
6360 
x_AddQuals(const CBioSource & src,CBioseqContext & ctx) const6361 void CSourceFeatureItem::x_AddQuals(const CBioSource& src, CBioseqContext& ctx) const
6362 {
6363     // add qualifiers from Org_ref field
6364     if ( src.IsSetOrg() ) {
6365         x_AddQuals(src.GetOrg(), ctx);
6366     }
6367     x_AddQual(eSQ_focus, new CFlatBoolQVal(src.IsSetIs_focus()));
6368 
6369 
6370     bool insertion_seq_name = false,
6371          plasmid_name = false,
6372          transposon_name = false;
6373 
6374     ITERATE (CBioSource::TSubtype, it, src.GetSubtype()) {
6375         ESourceQualifier slot = s_SubSourceToSlot(**it);
6376 
6377         switch( slot ) {
6378 
6379         case eSQ_insertion_seq_name:
6380             insertion_seq_name = true;
6381             x_AddQual(slot, new CFlatSubSourceQVal(**it));
6382             break;
6383 
6384         case eSQ_plasmid_name:
6385             plasmid_name = true;
6386             x_AddQual(slot, new CFlatSubSourceQVal(**it));
6387             break;
6388 
6389         case eSQ_transposon_name:
6390             transposon_name = true;
6391             x_AddQual(slot, new CFlatSubSourceQVal(**it));
6392             break;
6393 
6394         case eSQ_metagenomic:
6395             x_AddQual( eSQ_metagenomic, new CFlatStringQVal( "metagenomic") );
6396             break;
6397 
6398         default:
6399             if (slot != eSQ_none) {
6400                 x_AddQual(slot, new CFlatSubSourceQVal(**it));
6401             }
6402             break;
6403         }
6404     }
6405 
6406     // Gets direct "pcr-primers" tag from file and adds the quals from that
6407     x_AddPcrPrimersQuals(src, ctx);
6408 
6409     // some qualifiers are flags in genome and names in subsource,
6410     // print once with name
6411     CBioSource::TGenome genome = src.GetGenome();
6412     CRef<CFlatOrganelleQVal> organelle(new CFlatOrganelleQVal(genome));
6413     if ( (insertion_seq_name  &&  genome == CBioSource::eGenome_insertion_seq)  ||
6414          (plasmid_name  &&  genome == CBioSource::eGenome_plasmid)  ||
6415          (transposon_name  &&  genome == CBioSource::eGenome_transposon) ) {
6416         organelle.Reset();
6417     }
6418     if ( organelle ) {
6419         x_AddQual(eSQ_organelle, organelle);
6420     }
6421 
6422     if ( !WasDesc()  &&  m_Feat.IsSetComment() ) {
6423         x_AddQual(eSQ_seqfeat_note, new CFlatStringQVal(m_Feat.GetComment()));
6424     }
6425 }
6426 
6427 
x_FormatQuals(CFlatFeature & ff) const6428 void CSourceFeatureItem::x_FormatQuals(CFlatFeature& ff) const
6429 {
6430     ff.SetQuals().reserve(m_Quals.Size());
6431     CFlatFeature::TQuals& qvec = ff.SetQuals();
6432 
6433 #define DO_QUAL(x) x_FormatQual(eSQ_##x, GetStringOfSourceQual(eSQ_##x), qvec)
6434     DO_QUAL(organism);
6435 
6436     DO_QUAL(organelle);
6437 
6438     DO_QUAL(mol_type);
6439 
6440     DO_QUAL(submitter_seqid);
6441 
6442     DO_QUAL(strain);
6443     DO_QUAL(substrain);
6444     DO_QUAL(variety);
6445     DO_QUAL(serotype);
6446     DO_QUAL(serovar);
6447     DO_QUAL(cultivar);
6448     DO_QUAL(isolate);
6449     DO_QUAL(isolation_source);
6450     DO_QUAL(spec_or_nat_host);
6451     DO_QUAL(sub_species);
6452 
6453     DO_QUAL(specimen_voucher);
6454     DO_QUAL(culture_collection);
6455     DO_QUAL(bio_material);
6456 
6457     DO_QUAL(type_material);
6458 
6459     DO_QUAL(db_xref);
6460     DO_QUAL(org_xref);
6461 
6462     DO_QUAL(chromosome);
6463 
6464     DO_QUAL(segment);
6465 
6466     DO_QUAL(map);
6467     DO_QUAL(clone);
6468     DO_QUAL(subclone);
6469     DO_QUAL(haplotype);
6470     DO_QUAL(haplogroup);
6471     DO_QUAL(sex);
6472     DO_QUAL(mating_type);
6473     DO_QUAL(cell_line);
6474     DO_QUAL(cell_type);
6475     DO_QUAL(tissue_type);
6476     DO_QUAL(clone_lib);
6477     DO_QUAL(dev_stage);
6478     DO_QUAL(ecotype);
6479 
6480     if( ! GetContext()->Config().FrequencyToNote() ) {
6481         DO_QUAL(frequency);
6482     }
6483 
6484     DO_QUAL(germline);
6485     DO_QUAL(rearranged);
6486     DO_QUAL(transgenic);
6487     DO_QUAL(environmental_sample);
6488 
6489     DO_QUAL(lab_host);
6490     DO_QUAL(pop_variant);
6491     DO_QUAL(tissue_lib);
6492 
6493     DO_QUAL(plasmid_name);
6494     DO_QUAL(mobile_element);
6495     DO_QUAL(transposon_name);
6496     DO_QUAL(insertion_seq_name);
6497 
6498     DO_QUAL(country);
6499 
6500     DO_QUAL(focus);
6501 
6502     DO_QUAL(lat_lon);
6503     DO_QUAL(altitude);
6504     DO_QUAL(collection_date);
6505     DO_QUAL(collected_by);
6506     DO_QUAL(identified_by);
6507     DO_QUAL(PCR_primers);
6508     DO_QUAL(metagenome_source);
6509 
6510     if ( !GetContext()->Config().SrcQualsToNote() ) {
6511         // some note qualifiers appear as regular quals in GBench or Dump mode
6512         x_FormatGBNoteQuals(ff);
6513     }
6514 
6515     DO_QUAL(sequenced_mol);
6516     DO_QUAL(label);
6517     DO_QUAL(usedin);
6518     // DO_QUAL(citation);
6519 #undef DO_QUAL
6520 
6521     // Format the rest of the note quals (ones that weren't formatted above)
6522     // as a single note qualifier
6523     x_FormatNoteQuals(ff);
6524 }
6525 
6526 
x_FormatGBNoteQuals(CFlatFeature & ff) const6527 void CSourceFeatureItem::x_FormatGBNoteQuals(CFlatFeature& ff) const
6528 {
6529     _ASSERT(!GetContext()->Config().SrcQualsToNote());
6530     CFlatFeature::TQuals& qvec = ff.SetQuals();
6531 
6532 #define DO_QUAL(x) x_FormatQual(eSQ_##x, GetStringOfSourceQual(eSQ_##x), qvec)
6533     DO_QUAL(metagenomic);
6534     DO_QUAL(linkage_group);
6535 
6536     DO_QUAL(type);
6537     DO_QUAL(subtype);
6538     DO_QUAL(serogroup);
6539     DO_QUAL(pathovar);
6540     DO_QUAL(chemovar);
6541     DO_QUAL(biovar);
6542     DO_QUAL(biotype);
6543     DO_QUAL(group);
6544     DO_QUAL(subgroup);
6545     DO_QUAL(common);
6546     DO_QUAL(acronym);
6547     DO_QUAL(dosage);
6548 
6549     DO_QUAL(authority);
6550     DO_QUAL(forma);
6551     DO_QUAL(forma_specialis);
6552     DO_QUAL(synonym);
6553     DO_QUAL(anamorph);
6554     DO_QUAL(teleomorph);
6555     DO_QUAL(breed);
6556     if( GetContext()->Config().FrequencyToNote() ) {
6557         DO_QUAL(frequency);
6558     }
6559 
6560 //    DO_QUAL(metagenome_source),
6561 //    DO_QUAL(collection_date);
6562 //    DO_QUAL(collected_by);
6563 //    DO_QUAL(identified_by);
6564 //    DO_QUAL(pcr_primer);
6565     DO_QUAL(genotype);
6566     DO_QUAL(plastid_name);
6567 
6568     DO_QUAL(endogenous_virus_name);
6569 
6570     DO_QUAL(zero_orgmod);
6571     DO_QUAL(one_orgmod);
6572     DO_QUAL(zero_subsrc);
6573 #undef DO_QUAL
6574 }
6575 
6576 
6577 /*
6578 static bool s_IsExactAndNonExactMatchOnNoteQuals(CFlatFeature::TQuals& qvec, const string& str)
6579 {
6580     if (qvec.empty()) {
6581         return false;
6582     }
6583 
6584     int has_exact = 0;
6585     int non_exact = 0;
6586 
6587     CFlatFeature::TQuals::iterator it = qvec.begin();
6588     while (it != qvec.end()) {
6589         const string& val = (*it)->GetValue();
6590         if (NStr::Find(val, str) != NPOS) {
6591           if (NStr::Equal(val, str)) {
6592             has_exact++;
6593           } else {
6594             non_exact++;
6595           }
6596         }
6597         ++it;
6598     }
6599 
6600     if (has_exact == 1 && non_exact > 0) return true;
6601     return false;
6602 }
6603 */
6604 
6605 
6606 
x_FormatNoteQuals(CFlatFeature & ff) const6607 void CSourceFeatureItem::x_FormatNoteQuals(CFlatFeature& ff) const
6608 {
6609     CFlatFeature::TQuals qvec;
6610     bool add_period = false;
6611 
6612 #define DO_NOTE(x) x_FormatNoteQual(eSQ_##x, #x, qvec)
6613     if (m_WasDesc) {
6614         x_FormatNoteQual(eSQ_seqfeat_note, "note", qvec);
6615         DO_NOTE(orgmod_note);
6616         DO_NOTE(subsource_note);
6617     } else {
6618         DO_NOTE(unstructured);
6619     }
6620 
6621     if ( GetContext()->Config().SrcQualsToNote() ) {
6622         DO_NOTE(metagenomic);
6623         DO_NOTE(linkage_group);
6624         DO_NOTE(type);
6625         DO_NOTE(subtype);
6626         DO_NOTE(serogroup);
6627         DO_NOTE(pathovar);
6628         DO_NOTE(chemovar);
6629         DO_NOTE(biovar);
6630         DO_NOTE(biotype);
6631         DO_NOTE(group);
6632         DO_NOTE(subgroup);
6633         DO_NOTE(common);
6634         DO_NOTE(acronym);
6635         DO_NOTE(dosage);
6636 
6637         DO_NOTE(authority);
6638         DO_NOTE(forma);
6639         DO_NOTE(forma_specialis);
6640         DO_NOTE(synonym);
6641         DO_NOTE(anamorph);
6642         DO_NOTE(teleomorph);
6643         DO_NOTE(breed);
6644         if( GetContext()->Config().FrequencyToNote() ) {
6645             DO_NOTE(frequency);
6646         }
6647 
6648         /*
6649         if (s_IsExactAndNonExactMatchOnNoteQuals(qvec, "metagenomic")) {
6650             x_FormatNoteQual(eSQ_metagenome_source, "metagenomic; derived from metagenome", qvec);
6651         } else {
6652             x_FormatNoteQual(eSQ_metagenome_source, "derived from metagenome", qvec);
6653         }
6654         */
6655 
6656         DO_NOTE(genotype);
6657         x_FormatNoteQual(eSQ_plastid_name, "plastid", qvec);
6658 
6659         x_FormatNoteQual(eSQ_endogenous_virus_name, "endogenous_virus", qvec);
6660     }
6661     DO_NOTE(pcr_primer_note);
6662 
6663     if (!m_WasDesc) {
6664         x_FormatNoteQual(eSQ_seqfeat_note, "note", qvec);
6665         DO_NOTE(orgmod_note);
6666         DO_NOTE(subsource_note);
6667     }
6668 
6669     x_FormatNoteQual(eSQ_common_name, "common", qvec);
6670 
6671     if ( GetContext()->Config().SrcQualsToNote() ) {
6672         x_FormatNoteQual(eSQ_zero_orgmod, "?", qvec);
6673         x_FormatNoteQual(eSQ_one_orgmod,  "?", qvec);
6674         x_FormatNoteQual(eSQ_zero_subsrc, "?", qvec);
6675     }
6676 #undef DO_NOTE
6677 
6678     string notestr;
6679     string suffix;
6680 
6681     if ( GetSource().IsSetGenome()  &&
6682         GetSource().GetGenome() == CBioSource::eGenome_extrachrom ) {
6683         static const string kEOL = "\n";
6684         notestr += "extrachromosomal";
6685         suffix = kEOL;
6686     }
6687 
6688     s_QualVectorToNote(qvec, true, notestr, suffix, add_period);
6689     s_NoteFinalize(add_period, notestr, ff, eTilde_note);
6690 }
6691 
6692 
CSourceFeatureItem(const CBioSource & src,TRange range,CBioseqContext & ctx,CRef<feature::CFeatTree> ftree)6693 CSourceFeatureItem::CSourceFeatureItem
6694 (const CBioSource& src,
6695  TRange range,
6696  CBioseqContext& ctx,
6697  CRef<feature::CFeatTree> ftree)
6698     : CFeatureItemBase(CMappedFeat(), ctx, ftree),
6699       m_WasDesc(true), m_IsFocus(false), m_IsSynthetic(false)
6700 {
6701     if (!src.IsSetOrg()) {
6702         m_Feat = CMappedFeat();
6703         x_SetSkip();
6704         return;
6705     }
6706     x_SetObject(src);
6707 
6708     /// We build a fake BioSource feature - even for a source descriptor
6709     CRef<CSeq_feat> feat(new CSeq_feat);
6710     feat->SetData().SetBiosrc(const_cast<CBioSource&>(src));
6711     if ( range.IsWhole() ) {
6712         feat->SetLocation().SetWhole(*ctx.GetPrimaryId());
6713     } else {
6714         CSeq_interval& ival = feat->SetLocation().SetInt();
6715         ival.SetFrom(range.GetFrom());
6716         ival.SetTo(range.GetTo());
6717         ival.SetId(*ctx.GetPrimaryId());
6718     }
6719 
6720     CRef<CSeq_annot> an(new CSeq_annot);
6721     an->SetData().SetFtable().push_back(feat);
6722 
6723     CRef<CScope> local_scope(new CScope(*CObjectManager::GetInstance()));
6724     CSeq_annot_Handle sah = local_scope->AddSeq_annot(*an);
6725     m_Feat = *(CFeat_CI(sah));
6726     m_Loc = &m_Feat.GetLocation();
6727     x_SetObject(m_Feat.GetOriginalFeature());
6728 
6729     x_GatherInfo(ctx);
6730 }
6731 
6732 
x_FormatQual(ESourceQualifier slot,const CTempString & name,CFlatFeature::TQuals & qvec,IFlatQVal::TFlags flags) const6733 void CSourceFeatureItem::x_FormatQual
6734 (ESourceQualifier slot,
6735  const CTempString& name,
6736  CFlatFeature::TQuals& qvec,
6737  IFlatQVal::TFlags flags) const
6738 {
6739     TQCI it = m_Quals.LowerBound(slot);
6740     TQCI end = m_Quals.end();
6741     while (it != end  &&  it->first == slot) {
6742         const IFlatQVal* qual = it->second;
6743         qual->Format(qvec, name, *GetContext(),
6744                      flags | IFlatQVal::fIsSource);
6745         ++it;
6746     }
6747 }
6748 
6749 
Subtract(const CSourceFeatureItem & other,CScope & scope)6750 void CSourceFeatureItem::Subtract(const CSourceFeatureItem& other, CScope &scope)
6751 {
6752     m_Loc = Seq_loc_Subtract(GetLoc(), other.GetLoc(), CSeq_loc::fStrand_Ignore, &scope);
6753 }
6754 
6755 
SetLoc(const CSeq_loc & loc)6756 void CSourceFeatureItem::SetLoc(const CSeq_loc& loc)
6757 {
6758     m_Loc.Reset(&loc);
6759 }
6760 
6761 
6762 //  ----------------------------------------------------------------------------
x_GetGbValue(const string & key,string & value) const6763 bool CFeatureItem::x_GetGbValue(
6764     const string& key,
6765     string& value ) const
6766 //  ----------------------------------------------------------------------------
6767 {
6768     CSeq_feat::TQual gbQuals = m_Feat.GetQual();
6769     for ( CSeq_feat::TQual::iterator it = gbQuals.begin();
6770         it != gbQuals.end(); ++it )
6771     {
6772         //
6773         //  Idea:
6774         //  If a gbqual specifying the inference exists then bail out and let
6775         //  gbqual processing take care of this qualifier. If no such gbqual is
6776         //  present then add a default inference qualifier.
6777         //
6778         if (!(*it)->IsSetQual()  ||  !(*it)->IsSetVal()) {
6779             continue;
6780         }
6781         if ( (*it)->GetQual() == key ) {
6782             value = (*it)->GetVal();
6783             return true;
6784         }
6785     }
6786     return false;
6787 }
6788 
x_HasMethodtRNAscanSE(void) const6789 bool CFeatureItem::x_HasMethodtRNAscanSE(void) const
6790 {
6791     // try to make this fast, since it could be checked by every feature.
6792 
6793     // try to do cheap checks first
6794 
6795     if( ! m_Feat.IsSetExt() ) {
6796         return false;
6797     }
6798     const CUser_object & ext = m_Feat.GetExt();
6799     if( ! ext.IsSetType() || ! ext.IsSetData() ) {
6800         return false;
6801     }
6802     const CUser_object_Base::TType & ext_type = ext.GetType();
6803     if( ! ext_type.IsStr() || ext_type.GetStr() != "CombinedFeatureUserObjects" ) {
6804         return false;
6805     }
6806     const CUser_object::TData & ext_data = ext.GetData();
6807     ITERATE( CUser_object::TData, field_iter, ext_data ) {
6808         const CUser_field & field = **field_iter;
6809         if( ! field.IsSetLabel() || ! field.IsSetData()  ) {
6810             continue;
6811         }
6812         const CUser_field::TLabel & field_label = field.GetLabel();
6813         const CUser_field::TData & field_data = field.GetData();
6814         if( ! field_label.IsStr() || ! field_data.IsObject() ||
6815             field_label.GetStr() != "ModelEvidence" )
6816         {
6817             continue;
6818         }
6819         const CUser_object & evidence_object = field_data.GetObject();
6820         if( ! evidence_object.IsSetData() ||
6821             ! evidence_object.IsSetType() ||
6822             ! evidence_object.GetType().IsStr() ||
6823             evidence_object.GetType().GetStr() != "ModelEvidence" )
6824         {
6825             continue;
6826         }
6827         const CUser_object::TData & evidence_data = evidence_object.GetData();
6828         ITERATE( CUser_object::TData, evidence_iter, evidence_data ) {
6829             const CUser_field & evidence_field = **evidence_iter;
6830             if( ! evidence_field.IsSetLabel() ||
6831                 ! evidence_field.GetLabel().IsStr() ||
6832                 evidence_field.GetLabel().GetStr() != "Method" ||
6833                 ! evidence_field.IsSetData() ||
6834                 ! evidence_field.GetData().IsStr() ||
6835                 evidence_field.GetData().GetStr() != "tRNAscan-SE" )
6836             {
6837                 continue;
6838             }
6839             // we found proof of method tRNAscan-SE, so we return true
6840             return true;
6841         }
6842     }
6843 
6844     // didn't find any proof of method tRNAscan-SE
6845     return false;
6846 }
6847 
6848 END_SCOPE(objects)
6849 END_NCBI_SCOPE
6850 
6851