1 /*  $Id: unit_test_autodef.cpp 632623 2021-06-03 17:38:11Z ivanov $
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *               National Center for Biotechnology Information
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government have not placed any restriction on its use or reproduction.
13 *
14 *  Although all reasonable efforts have been taken to ensure the accuracy
15 *  and reliability of the software and data, the NLM and the U.S.
16 *  Government do not and cannot warrant the performance or results that
17 *  may be obtained by using this software or data. The NLM and the U.S.
18 *  Government disclaim all warranties, express or implied, including
19 *  warranties of performance, merchantability or fitness for any particular
20 *  purpose.
21 *
22 *  Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author:  Colleen Bollin, NCBI
27 *
28 * File Description:
29 *   Unit tests for the validator.
30 *
31 * ===========================================================================
32 */
33 
34 #include <ncbi_pch.hpp>
35 
36 #include "unit_test_autodef.hpp"
37 
38 #include <corelib/ncbi_system.hpp>
39 
40 // This macro should be defined before inclusion of test_boost.hpp in all
41 // "*.cpp" files inside executable except one. It is like function main() for
42 // non-Boost.Test executables is defined only in one *.cpp file - other files
43 // should not include it. If NCBI_BOOST_NO_AUTO_TEST_MAIN will not be defined
44 // then test_boost.hpp will define such "main()" function for tests.
45 //
46 // Usually if your unit tests contain only one *.cpp file you should not
47 // care about this macro at all.
48 //
49 //#define NCBI_BOOST_NO_AUTO_TEST_MAIN
50 
51 
52 // This header must be included before all Boost.Test headers if there are any
53 #include <corelib/test_boost.hpp>
54 
55 #include <objects/biblio/Id_pat.hpp>
56 #include <objects/biblio/Title.hpp>
57 #include <objects/general/Object_id.hpp>
58 #include <objects/general/Dbtag.hpp>
59 #include <objects/general/User_object.hpp>
60 #include <objects/medline/Medline_entry.hpp>
61 #include <objects/misc/sequence_macros.hpp>
62 #include <objects/pub/Pub_equiv.hpp>
63 #include <objects/pub/Pub.hpp>
64 #include <objects/seqset/Seq_entry.hpp>
65 #include <objects/seq/GIBB_mol.hpp>
66 #include <objects/seq/Seq_ext.hpp>
67 #include <objects/seq/Delta_ext.hpp>
68 #include <objects/seq/Delta_seq.hpp>
69 #include <objects/seq/Seq_literal.hpp>
70 #include <objects/seq/Ref_ext.hpp>
71 #include <objects/seq/Map_ext.hpp>
72 #include <objects/seq/Seg_ext.hpp>
73 #include <objects/seq/Seq_gap.hpp>
74 #include <objects/seq/Seq_data.hpp>
75 #include <objects/seq/Seq_descr.hpp>
76 #include <objects/seq/Seqdesc.hpp>
77 #include <objects/seq/MolInfo.hpp>
78 #include <objects/seq/Pubdesc.hpp>
79 #include <objects/seq/Seq_hist.hpp>
80 #include <objects/seq/Seq_hist_rec.hpp>
81 #include <objects/seqalign/Dense_seg.hpp>
82 #include <objects/seqblock/GB_block.hpp>
83 #include <objects/seqblock/EMBL_block.hpp>
84 #include <objects/seqfeat/BioSource.hpp>
85 #include <objects/seqfeat/Org_ref.hpp>
86 #include <objects/seqfeat/OrgName.hpp>
87 #include <objects/seqfeat/SubSource.hpp>
88 #include <objects/seqfeat/Imp_feat.hpp>
89 #include <objects/seqfeat/Cdregion.hpp>
90 #include <objects/seqfeat/RNA_ref.hpp>
91 #include <objects/seqfeat/Gb_qual.hpp>
92 #include <objects/seqloc/Seq_id.hpp>
93 #include <objects/seqloc/PDB_seq_id.hpp>
94 #include <objects/seqloc/Giimport_id.hpp>
95 #include <objects/seqloc/Patent_seq_id.hpp>
96 #include <objects/seqloc/Seq_loc.hpp>
97 #include <objects/seqloc/Seq_interval.hpp>
98 #include <objmgr/object_manager.hpp>
99 #include <objmgr/scope.hpp>
100 #include <objmgr/bioseq_ci.hpp>
101 #include <objmgr/feat_ci.hpp>
102 #include <objmgr/seq_vector.hpp>
103 #include <objmgr/util/sequence.hpp>
104 #include <objmgr/seqdesc_ci.hpp>
105 #include <objects/seq/seqport_util.hpp>
106 #include <objtools/data_loaders/genbank/gbloader.hpp>
107 #include <objtools/unit_test_util/unit_test_util.hpp>
108 #include <corelib/ncbiapp.hpp>
109 
110 #include <objtools/edit/autodef_with_tax.hpp>
111 
112 
113 // for writing out tmp files
114 #include <serial/objostrasn.hpp>
115 #include <serial/objostrasnb.hpp>
116 
117 
118 #include <common/test_assert.h>  /* This header must go last */
119 
120 
121 extern const char* sc_TestEntryCollidingLocusTags;
122 
123 BEGIN_NCBI_SCOPE
BEGIN_SCOPE(objects)124 BEGIN_SCOPE(objects)
125 
126 
127 
128 
129 
130 NCBITEST_INIT_TREE()
131 {
132     if ( !CNcbiApplication::Instance()->GetConfig().HasEntry("NCBI", "Data") ) {
133     }
134 }
135 
136 static bool s_debugMode = false;
137 
NCBITEST_INIT_CMDLINE(arg_desc)138 NCBITEST_INIT_CMDLINE(arg_desc)
139 {
140     // Here we make descriptions of command line parameters that we are
141     // going to use.
142 
143     arg_desc->AddFlag( "debug_mode",
144         "Debugging mode writes errors seen for each test" );
145 }
146 
NCBITEST_AUTO_INIT()147 NCBITEST_AUTO_INIT()
148 {
149     // initialization function body
150 
151     const CArgs& args = CNcbiApplication::Instance()->GetArgs();
152     if (args["debug_mode"]) {
153         s_debugMode = true;
154     }
155 }
156 
157 
BuildSequence()158 static CRef<CSeq_entry> BuildSequence()
159 {
160     CRef<CSeq_entry> entry(new CSeq_entry());
161     entry->SetSeq().SetInst().SetMol(CSeq_inst::eMol_dna);
162     entry->SetSeq().SetInst().SetRepr(CSeq_inst::eRepr_raw);
163     entry->SetSeq().SetInst().SetSeq_data().SetIupacna().Set("AATTGGCCAAAATTGGCCAAAATTGGCCAAAATTGGCCAAAATTGGCCAAAATTGGCCAA");
164     entry->SetSeq().SetInst().SetLength(60);
165 
166     CRef<CSeq_id> id(new CSeq_id());
167     id->SetLocal().SetStr ("good");
168     entry->SetSeq().SetId().push_back(id);
169 
170     CRef<CSeqdesc> mdesc(new CSeqdesc());
171     mdesc->SetMolinfo().SetBiomol(CMolInfo::eBiomol_genomic);
172     entry->SetSeq().SetDescr().Set().push_back(mdesc);
173     return entry;
174 }
175 
176 
AddSource(CRef<CSeq_entry> entry,string taxname)177 static CRef<CSeqdesc> AddSource (CRef<CSeq_entry> entry, string taxname)
178 {
179     CRef<CSeqdesc> odesc(new CSeqdesc());
180     odesc->SetSource().SetOrg().SetTaxname(taxname);
181 
182     if (entry->IsSeq()) {
183         entry->SetSeq().SetDescr().Set().push_back(odesc);
184     } else if (entry->IsSet()) {
185         entry->SetSet().SetDescr().Set().push_back(odesc);
186     }
187     return odesc;
188 }
189 
190 
AddTitle(CRef<CSeq_entry> entry,string defline)191 static void AddTitle (CRef<CSeq_entry> entry, string defline)
192 {
193     CRef<CSeqdesc> odesc(new CSeqdesc());
194     odesc->SetTitle(defline);
195 
196     if (entry->IsSeq()) {
197         bool found = false;
198         if (entry->SetSeq().IsSetDescr()) {
199             NON_CONST_ITERATE(CBioseq::TDescr::Tdata, it, entry->SetSeq().SetDescr().Set()) {
200                 if ((*it)->IsTitle()) {
201                     (*it)->SetTitle(defline);
202                     found = true;
203                 }
204             }
205         }
206         if (!found) {
207             entry->SetSeq().SetDescr().Set().push_back(odesc);
208         }
209     } else if (entry->IsSet()) {
210         if (entry->GetSet().IsSetClass() && entry->GetSet().GetClass() == CBioseq_set::eClass_nuc_prot) {
211             AddTitle (entry->SetSet().SetSeq_set().front(), defline);
212         } else {
213             entry->SetSet().SetDescr().Set().push_back(odesc);
214         }
215     }
216 }
217 
218 
HasBoolField(const CUser_object & user,const string & field_name)219 size_t HasBoolField(const CUser_object& user, const string& field_name)
220 {
221     size_t num_found = 0;
222     ITERATE(CUser_object::TData, it, user.GetData()) {
223         if ((*it)->IsSetLabel() && (*it)->GetLabel().IsStr() &&
224             NStr::EqualNocase((*it)->GetLabel().GetStr(), field_name)) {
225             num_found++;
226             if (!(*it)->IsSetData()) {
227                 BOOST_CHECK_EQUAL("Data for " + field_name + "should be set", "Data not set");
228             } else {
229                 BOOST_CHECK_EQUAL((*it)->GetData().Which(), CUser_field::TData::e_Bool);
230                 if ((*it)->GetData().IsBool()) {
231                     BOOST_CHECK_EQUAL((*it)->GetData().GetBool(), true);
232                 }
233             }
234         }
235     }
236     return num_found;
237 }
238 
HasStringField(const CUser_object & user,const string & field_name,const string & value)239 size_t HasStringField(const CUser_object& user, const string& field_name, const string& value)
240 {
241     size_t num_found = 0;
242     ITERATE(CUser_object::TData, it, user.GetData()) {
243         if ((*it)->IsSetLabel() && (*it)->GetLabel().IsStr() &&
244             NStr::EqualNocase((*it)->GetLabel().GetStr(), field_name)) {
245             num_found++;
246             if (!(*it)->IsSetData()) {
247                 BOOST_CHECK_EQUAL("Data for " + field_name + "should be set", "Data not set");
248             } else {
249                 BOOST_CHECK_EQUAL((*it)->GetData().Which(), CUser_field::TData::e_Str);
250                 if ((*it)->GetData().IsStr()) {
251                     BOOST_CHECK_EQUAL((*it)->GetData().GetStr(), value);
252                 }
253             }
254         }
255     }
256     return num_found;
257 }
258 
HasIntField(const CUser_object & user,const string & field_name,int value)259 size_t HasIntField(const CUser_object& user, const string& field_name, int value)
260 {
261     size_t num_found = 0;
262     ITERATE(CUser_object::TData, it, user.GetData()) {
263         if ((*it)->IsSetLabel() && (*it)->GetLabel().IsStr() &&
264             NStr::EqualNocase((*it)->GetLabel().GetStr(), field_name)) {
265             num_found++;
266             if (!(*it)->IsSetData()) {
267                 BOOST_CHECK_EQUAL("Data for " + field_name + "should be set", "Data not set");
268             } else {
269                 BOOST_CHECK_EQUAL((*it)->GetData().Which(), CUser_field::TData::e_Int);
270                 if ((*it)->GetData().IsInt()) {
271                     BOOST_CHECK_EQUAL((*it)->GetData().GetInt(), value);
272                 }
273             }
274         }
275     }
276     return num_found;
277 }
278 
279 
CheckAutoDefOptions(const CUser_object & user,CAutoDefOptions & opts)280 void CheckAutoDefOptions
281 (const CUser_object& user,
282  CAutoDefOptions& opts)
283 {
284     size_t expected_num_fields = 7;
285     if (opts.GetOrgMods().size() > 0 || opts.GetSubSources().size() > 0) {
286         expected_num_fields++;
287     }
288     if (!opts.GetDoNotApplyToSp()) {
289         expected_num_fields--;
290     }
291     if (opts.GetUseLabels()) {
292         expected_num_fields++;
293     }
294     if (opts.GetAllowModAtEndOfTaxname()) {
295         expected_num_fields++;
296     }
297     if (opts.GetUseFakePromoters()) {
298         expected_num_fields ++;
299     }
300     if (opts.GetKeepRegulatoryFeatures()) {
301         expected_num_fields++;
302     }
303     if (opts.GetKeepIntrons()) {
304         expected_num_fields++;
305     }
306     if (opts.GetKeepExons()) {
307         expected_num_fields++;
308     }
309     if (opts.GetKeepuORFs()) {
310         expected_num_fields++;
311     }
312     if (opts.GetKeepMobileElements()) {
313         expected_num_fields++;
314     }
315     if (opts.AreAnyFeaturesSuppressed()) {
316         expected_num_fields++;
317     }
318     if (opts.GetKeepMiscRecomb()) {
319         expected_num_fields++;
320     }
321     if (opts.GetKeep5UTRs()) {
322         expected_num_fields++;
323     }
324     if (opts.GetKeep3UTRs()) {
325         expected_num_fields++;
326     }
327     if (opts.GetKeepRepeatRegion()) {
328         expected_num_fields++;
329     }
330     if (!NStr::IsBlank(opts.GetCustomFeatureClause())) {
331         expected_num_fields++;
332     }
333 
334     BOOST_CHECK_EQUAL(user.GetObjectType(), CUser_object::eObjectType_AutodefOptions);
335     BOOST_CHECK_EQUAL(user.GetData().size(), expected_num_fields);
336     BOOST_CHECK_EQUAL(HasBoolField(user, "LeaveParenthetical"), 1);
337     BOOST_CHECK_EQUAL(HasBoolField(user, "SpecifyNuclearProduct"), 1);
338     if (opts.GetUseLabels()) {
339         BOOST_CHECK_EQUAL(HasBoolField(user, "UseLabels"), 1);
340     }
341     if (opts.GetAllowModAtEndOfTaxname()) {
342         BOOST_CHECK_EQUAL(HasBoolField(user, "AllowModAtEndOfTaxname"), 1);
343     }
344     if (opts.GetDoNotApplyToSp()) {
345         BOOST_CHECK_EQUAL(HasBoolField(user, "DoNotApplyToSp"), 1);
346     }
347     if (opts.GetUseFakePromoters()) {
348         BOOST_CHECK_EQUAL(HasBoolField(user, "UseFakePromoters"), 1);
349     }
350     if (opts.GetKeepIntrons()) {
351         BOOST_CHECK_EQUAL(HasBoolField(user, "KeepIntrons"), 1);
352     }
353     if (opts.GetKeepExons()) {
354         BOOST_CHECK_EQUAL(HasBoolField(user, "KeepExons"), 1);
355     }
356     if (opts.GetKeepuORFs()) {
357         BOOST_CHECK_EQUAL(HasBoolField(user, "KeepuORFs"), 1);
358     }
359     BOOST_CHECK_EQUAL(HasStringField(user, "MiscFeatRule", opts.GetMiscFeatRule(opts.GetMiscFeatRule())) , 1);
360     BOOST_CHECK_EQUAL(HasStringField(user, "FeatureListType", opts.GetFeatureListType(opts.GetFeatureListType())), 1);
361     BOOST_CHECK_EQUAL(HasStringField(user, "HIVRule", "WantBoth"), 1);
362     if (!NStr::IsBlank(opts.GetCustomFeatureClause())) {
363         BOOST_CHECK_EQUAL(HasStringField(user, "CustomFeatureClause", opts.GetCustomFeatureClause()), 1);
364     }
365     BOOST_CHECK_EQUAL(HasIntField(user, "MaxMods", -99), 1);
366     if (user.GetData().size() != expected_num_fields) {
367         int field_num = 1;
368         ITERATE(CUser_object::TData, it, user.GetData()) {
369             if (!(*it)->IsSetLabel() || !(*it)->GetLabel().IsStr()) {
370                 BOOST_CHECK_EQUAL("Label should be set", "label not set for " + NStr::IntToString(field_num));
371             } else {
372                 printf("%s\n", (*it)->GetLabel().GetStr().c_str());
373             }
374         }
375     }
376 }
377 
378 
CheckDeflineMatches(CSeq_entry_Handle seh,objects::CAutoDefWithTaxonomy & autodef,CRef<CAutoDefModifierCombo> mod_combo)379 static void CheckDeflineMatches(CSeq_entry_Handle seh,
380                                 objects::CAutoDefWithTaxonomy& autodef,
381                                 CRef<CAutoDefModifierCombo> mod_combo)
382 {
383     // check defline for each nucleotide sequence
384     CBioseq_CI seq_iter(seh, CSeq_inst::eMol_na);
385     for ( ; seq_iter; ++seq_iter ) {
386        CBioseq_Handle bh (*seq_iter);
387        //Display ID of sequence
388        CConstRef<CSeq_id> id = bh.GetSeqId();
389 
390        // original defline
391        string orig_defline = "";
392        CSeqdesc_CI desc_it(bh, CSeqdesc::e_Title, 1);
393        if (desc_it) {
394            orig_defline = desc_it->GetTitle();
395        }
396 
397        string new_defline = autodef.GetOneDefLine(mod_combo, bh);
398 
399        BOOST_CHECK_EQUAL(orig_defline, new_defline);
400 
401        CRef<CUser_object> tmp_user = autodef.GetOptionsObject();
402        CAutoDefOptions opts;
403        opts.InitFromUserObject(*tmp_user);
404        mod_combo->InitOptions(opts);
405        CRef<CUser_object> user = opts.MakeUserObject();
406        CAutoDef autodef2;
407        autodef2.SetOptionsObject(*user);
408        new_defline = autodef2.GetOneDefLine(bh);
409        BOOST_CHECK_EQUAL(orig_defline, new_defline);
410        CheckAutoDefOptions(*user, opts);
411     }
412 
413     // check popset title if needed
414 
415     if (seh.IsSet() && seh.GetSet().GetCompleteBioseq_set()->NeedsDocsumTitle()) {
416         string orig_defline = "";
417         CSeqdesc_CI desc_it(seh, CSeqdesc::e_Title, 1);
418         if (desc_it) {
419             orig_defline = desc_it->GetTitle();
420         }
421         string new_defline = autodef.GetDocsumDefLine(seh);
422         BOOST_CHECK_EQUAL(orig_defline, new_defline);
423     }
424 }
425 
426 
CheckDeflineMatches(CRef<CSeq_entry> entry,vector<CSubSource::ESubtype> subsrcs,vector<COrgMod::ESubtype> orgmods)427 static void CheckDeflineMatches(CRef<CSeq_entry> entry,
428                                 vector<CSubSource::ESubtype> subsrcs,
429                                 vector<COrgMod::ESubtype> orgmods)
430 {
431     CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
432 
433     CRef<CScope> scope(new CScope(*object_manager));
434     CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry (*entry);
435 
436     objects::CAutoDefWithTaxonomy autodef;
437 
438     // add to autodef
439     autodef.AddSources (seh);
440 
441     CRef<CAutoDefModifierCombo> mod_combo;
442     mod_combo = new CAutoDefModifierCombo ();
443     mod_combo->SetUseModifierLabels(true);
444     ITERATE(vector<CSubSource::ESubtype>, it, subsrcs) {
445         mod_combo->AddSubsource(*it, true);
446     }
447     ITERATE(vector<COrgMod::ESubtype>, it, orgmods) {
448         mod_combo->AddOrgMod(*it, true);
449     }
450 
451     autodef.SetFeatureListType(CAutoDefOptions::eListAllFeatures);
452     autodef.SetMiscFeatRule(CAutoDefOptions::eDelete);
453 
454     CheckDeflineMatches(seh, autodef, mod_combo);
455 }
456 
457 
CheckDeflineMatches(CRef<CSeq_entry> entry,bool use_best=false,CAutoDefOptions::EFeatureListType list_type=CAutoDefOptions::eListAllFeatures,CAutoDefOptions::EMiscFeatRule misc_feat_rule=CAutoDefOptions::eNoncodingProductFeat)458 static void CheckDeflineMatches(CRef<CSeq_entry> entry, bool use_best = false,
459                                 CAutoDefOptions::EFeatureListType list_type = CAutoDefOptions::eListAllFeatures,
460                                 CAutoDefOptions::EMiscFeatRule misc_feat_rule = CAutoDefOptions::eNoncodingProductFeat)
461 {
462     CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
463 
464     CRef<CScope> scope(new CScope(*object_manager));
465     CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry (*entry);
466 
467     objects::CAutoDefWithTaxonomy autodef;
468 
469     // add to autodef
470     autodef.AddSources (seh);
471 
472     CRef<CAutoDefModifierCombo> mod_combo;
473     if (use_best) {
474         mod_combo = autodef.FindBestModifierCombo();
475     } else {
476         mod_combo = new CAutoDefModifierCombo ();
477     }
478 
479     autodef.SetFeatureListType(list_type);
480     autodef.SetMiscFeatRule(misc_feat_rule);
481 
482     CheckDeflineMatches(seh, autodef, mod_combo);
483 }
484 
485 
CheckDeflineMatches(CRef<CSeq_entry> entry,CSeqFeatData::ESubtype feat_to_suppress)486 static void CheckDeflineMatches(CRef<CSeq_entry> entry, CSeqFeatData::ESubtype feat_to_suppress)
487 {
488     CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
489 
490     CRef<CScope> scope(new CScope(*object_manager));
491     CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
492 
493     objects::CAutoDefWithTaxonomy autodef;
494 
495     // add to autodef
496     autodef.AddSources(seh);
497 
498     CRef<CAutoDefModifierCombo> mod_combo = autodef.FindBestModifierCombo();
499 
500     autodef.SuppressFeature(feat_to_suppress);
501 
502     autodef.SetFeatureListType(CAutoDefOptions::eListAllFeatures);
503     autodef.SetMiscFeatRule(CAutoDefOptions::eNoncodingProductFeat);
504 
505     CheckDeflineMatches(seh, autodef, mod_combo);
506 }
507 
508 
FindNucInSeqEntry(CRef<CSeq_entry> entry)509 CRef<CSeq_entry> FindNucInSeqEntry(CRef<CSeq_entry> entry)
510 {
511     CRef<CSeq_entry> empty(NULL);
512     if (!entry) {
513         return empty;
514     } else if (entry->IsSeq() && entry->GetSeq().IsNa()) {
515         return entry;
516     } else if (entry->IsSet()) {
517         ITERATE(CBioseq_set::TSeq_set, it, entry->GetSet().GetSeq_set()) {
518             CRef<CSeq_entry> rval = FindNucInSeqEntry(*it);
519             if (rval) {
520                 return rval;
521             }
522         }
523     }
524     return empty;
525 }
526 
527 
AddFeat(CRef<CSeq_feat> feat,CRef<CSeq_entry> entry)528 static void AddFeat (CRef<CSeq_feat> feat, CRef<CSeq_entry> entry)
529 {
530     CRef<CSeq_annot> annot;
531 
532     if (entry->IsSeq()) {
533         if (!entry->GetSeq().IsSetAnnot()
534             || !entry->GetSeq().GetAnnot().front()->IsFtable()) {
535             CRef<CSeq_annot> new_annot(new CSeq_annot());
536             entry->SetSeq().SetAnnot().push_back(new_annot);
537             annot = new_annot;
538         } else {
539             annot = entry->SetSeq().SetAnnot().front();
540         }
541     } else if (entry->IsSet()) {
542         if (!entry->GetSet().IsSetAnnot()
543             || !entry->GetSet().GetAnnot().front()->IsFtable()) {
544             CRef<CSeq_annot> new_annot(new CSeq_annot());
545             entry->SetSet().SetAnnot().push_back(new_annot);
546             annot = new_annot;
547         } else {
548             annot = entry->SetSet().SetAnnot().front();
549         }
550     }
551 
552     if (!feat->IsSetLocation() || feat->GetLocation().Which() == CSeq_loc::e_not_set) {
553         CRef<CSeq_entry> nuc_entry = FindNucInSeqEntry(entry);
554         if (nuc_entry) {
555             CRef<CSeq_id> id(new CSeq_id());
556             id->Assign(*(nuc_entry->GetSeq().GetId().front()));
557             feat->SetLocation().SetInt().SetId(*id);
558             feat->SetLocation().SetInt().SetFrom(0);
559             feat->SetLocation().SetInt().SetTo(entry->GetSeq().GetLength() - 1);
560         }
561     }
562 
563     annot->SetData().SetFtable().push_back(feat);
564 }
565 
566 
MakeProteinForNucProtSet(string id,string protein_name)567 static CRef<CSeq_entry> MakeProteinForNucProtSet (string id, string protein_name)
568 {
569     // make protein
570     CRef<CBioseq> pseq(new CBioseq());
571     pseq->SetInst().SetMol(CSeq_inst::eMol_aa);
572     pseq->SetInst().SetRepr(CSeq_inst::eRepr_raw);
573     pseq->SetInst().SetSeq_data().SetIupacaa().Set("MPRKTEIN");
574     pseq->SetInst().SetLength(8);
575 
576     CRef<CSeq_id> pid(new CSeq_id());
577     pid->SetLocal().SetStr (id);
578     pseq->SetId().push_back(pid);
579 
580     CRef<CSeqdesc> mpdesc(new CSeqdesc());
581     mpdesc->SetMolinfo().SetBiomol(CMolInfo::eBiomol_peptide);
582     pseq->SetDescr().Set().push_back(mpdesc);
583 
584     CRef<CSeq_entry> pentry(new CSeq_entry());
585     pentry->SetSeq(*pseq);
586 
587     CRef<CSeq_feat> feat (new CSeq_feat());
588     feat->SetData().SetProt().SetName().push_back(protein_name);
589     feat->SetLocation().SetInt().SetId().SetLocal().SetStr(id);
590     feat->SetLocation().SetInt().SetFrom(0);
591     feat->SetLocation().SetInt().SetTo(7);
592     AddFeat (feat, pentry);
593 
594     return pentry;
595 }
596 
597 
MakeCDSForNucProtSet(string nuc_id,string prot_id)598 static CRef<CSeq_feat> MakeCDSForNucProtSet (string nuc_id, string prot_id)
599 {
600     CRef<CSeq_feat> cds (new CSeq_feat());
601     cds->SetData().SetCdregion();
602     cds->SetProduct().SetWhole().SetLocal().SetStr(prot_id);
603     cds->SetLocation().SetInt().SetId().SetLocal().SetStr(nuc_id);
604     cds->SetLocation().SetInt().SetFrom(0);
605     cds->SetLocation().SetInt().SetTo(26);
606     return cds;
607 }
608 
609 
MakeGeneForNucProtSet(const string & nuc_id,const string & locus,const string & allele=kEmptyStr)610 static CRef<CSeq_feat> MakeGeneForNucProtSet(const string& nuc_id, const string& locus, const string& allele = kEmptyStr)
611 {
612     CRef<CSeq_feat> gene(new CSeq_feat());
613     gene->SetData().SetGene().SetLocus(locus);
614     if (!allele.empty()) {
615         gene->SetData().SetGene().SetAllele(allele);
616     }
617     gene->SetLocation().SetInt().SetId().SetLocal().SetStr(nuc_id);
618     gene->SetLocation().SetInt().SetFrom(0);
619     gene->SetLocation().SetInt().SetTo(26);
620     return gene;
621 }
622 
623 
BuildNucProtSet(const string & protein_name,const string & locus=kEmptyStr,const string & allele=kEmptyStr)624 static CRef<CSeq_entry> BuildNucProtSet(const string& protein_name, const string& locus = kEmptyStr, const string& allele = kEmptyStr)
625 {
626     CRef<CBioseq_set> set(new CBioseq_set());
627     set->SetClass(CBioseq_set::eClass_nuc_prot);
628 
629     // make nucleotide
630     CRef<CBioseq> nseq(new CBioseq());
631     nseq->SetInst().SetMol(CSeq_inst::eMol_dna);
632     nseq->SetInst().SetRepr(CSeq_inst::eRepr_raw);
633     nseq->SetInst().SetSeq_data().SetIupacna().Set("ATGCCCAGAAAAACAGAGATAAACTAAGGGATGCCCAGAAAAACAGAGATAAACTAAGGG");
634     nseq->SetInst().SetLength(60);
635 
636     CRef<CSeq_id> id(new CSeq_id());
637     id->SetLocal().SetStr ("nuc");
638     nseq->SetId().push_back(id);
639 
640     CRef<CSeqdesc> mdesc(new CSeqdesc());
641     mdesc->SetMolinfo().SetBiomol(CMolInfo::eBiomol_genomic);
642     nseq->SetDescr().Set().push_back(mdesc);
643 
644     CRef<CSeq_entry> nentry(new CSeq_entry());
645     nentry->SetSeq(*nseq);
646 
647     if (!locus.empty()) {
648         CRef<CSeq_feat> gene = MakeGeneForNucProtSet("nuc", locus, allele);
649         AddFeat(gene, nentry);
650     }
651 
652     set->SetSeq_set().push_back(nentry);
653 
654     // make protein
655     CRef<CSeq_entry> pentry = MakeProteinForNucProtSet("prot", protein_name);
656 
657     set->SetSeq_set().push_back(pentry);
658 
659     CRef<CSeq_entry> set_entry(new CSeq_entry());
660     set_entry->SetSet(*set);
661 
662     CRef<CSeq_feat> cds = MakeCDSForNucProtSet("nuc", "prot");
663     AddFeat (cds, set_entry);
664 
665     return set_entry;
666 }
667 
668 
669 // tests
670 
BOOST_AUTO_TEST_CASE(Test_SimpleAutodef)671 BOOST_AUTO_TEST_CASE(Test_SimpleAutodef)
672 {
673     // prepare entry
674     CRef<CSeq_entry> entry = BuildSequence();
675     AddSource (entry, "Homo sapiens");
676     AddTitle(entry, "Homo sapiens sequence.");
677 
678     CheckDeflineMatches(entry);
679 }
680 
681 
BOOST_AUTO_TEST_CASE(Test_UnnamedPlasmid)682 BOOST_AUTO_TEST_CASE(Test_UnnamedPlasmid)
683 {
684     // prepare entry
685     CRef<CSeq_entry> entry = BuildSequence();
686     CRef<CSeqdesc> desc = AddSource (entry, "Alcanivorax sp. HA03");
687     desc->SetSource().SetGenome(CBioSource::eGenome_plasmid);
688     CRef<CSubSource> sub(new CSubSource("plasmid-name", "unnamed"));
689     desc->SetSource().SetSubtype().push_back(sub);
690     AddTitle(entry, "Alcanivorax sp. HA03 plasmid sequence.");
691 
692     CheckDeflineMatches(entry);
693 }
694 
695 
BOOST_AUTO_TEST_CASE(Test_SQD_476)696 BOOST_AUTO_TEST_CASE(Test_SQD_476)
697 {
698     CRef<CSeq_entry> entry = BuildNucProtSet("chlorocatechol 1,2-dioxygenase");
699     CRef<CSeqdesc> desc = AddSource (entry, "Alcanivorax sp. HA03");
700     desc->SetSource().SetGenome(CBioSource::eGenome_plasmid);
701     CRef<CSubSource> sub(new CSubSource("plasmid-name", "unnamed"));
702     desc->SetSource().SetSubtype().push_back(sub);
703     AddTitle(entry, "Alcanivorax sp. HA03 plasmid chlorocatechol 1,2-dioxygenase gene, complete cds.");
704 
705     CheckDeflineMatches(entry);
706 }
707 
708 
BOOST_AUTO_TEST_CASE(Test_SQD_630)709 BOOST_AUTO_TEST_CASE(Test_SQD_630)
710 {
711     CRef<CSeq_entry> entry = BuildSequence();
712     CRef<CSeqdesc> desc = AddSource (entry, "Clathrina aurea");
713     CRef<CSubSource> sub(new CSubSource("clone", "Cau_E6"));
714     desc->SetSource().SetSubtype().push_back(sub);
715     CRef<CSeq_feat> feat(new CSeq_feat());
716     feat->SetData().SetImp().SetKey("repeat_region");
717     CRef<CGb_qual> qual(new CGb_qual("satellite", "microsatellite"));
718     feat->SetQual().push_back(qual);
719     AddFeat(feat, entry);
720 
721     AddTitle(entry, "Clathrina aurea microsatellite sequence.");
722 
723     CheckDeflineMatches(entry);
724 
725     feat->SetComment("dinucleotide");
726     CheckDeflineMatches(entry);
727 }
728 
729 
BOOST_AUTO_TEST_CASE(Test_SQD_169)730 BOOST_AUTO_TEST_CASE(Test_SQD_169)
731 {
732     CRef<CSeq_entry> entry = BuildSequence();
733     CRef<CSeqdesc> desc = AddSource (entry, "Clathrina aurea");
734     CRef<CSeq_feat> feat(new CSeq_feat());
735     feat->SetData().SetImp().SetKey("misc_feature");
736     feat->SetComment("contains 5S ribosomal RNA and nontranscribed spacer");
737     AddFeat(feat, entry);
738 
739     AddTitle(entry, "Clathrina aurea 5S ribosomal RNA gene region.");
740 
741     CheckDeflineMatches(entry);
742 }
743 
744 
BOOST_AUTO_TEST_CASE(Test_SQD_374)745 BOOST_AUTO_TEST_CASE(Test_SQD_374)
746 {
747     CRef<CSeq_entry> entry = BuildSequence();
748     CRef<CSeqdesc> desc = AddSource (entry, "Clathrina aurea");
749     CRef<CSeq_feat> feat(new CSeq_feat());
750     feat->SetData().SetImp().SetKey("misc_feature");
751     feat->SetComment("contains DNA lyase (Apn2) gene, Apn2-Mat1 intergenic spacer, and mating type protein (Mat1) gene");
752     AddFeat(feat, entry);
753     feat->SetLocation().SetPartialStart(true, eExtreme_Biological);
754     feat->SetLocation().SetPartialStop(true, eExtreme_Biological);
755 
756     AddTitle(entry, "Clathrina aurea DNA lyase (Apn2) gene, partial sequence; Apn2-Mat1 intergenic spacer, complete sequence; and mating type protein (Mat1) gene, partial sequence.");
757 
758     CheckDeflineMatches(entry);
759 }
760 
761 
BOOST_AUTO_TEST_CASE(Test_SQD_155)762 BOOST_AUTO_TEST_CASE(Test_SQD_155)
763 {
764     CRef<CSeq_entry> entry = BuildSequence();
765     CRef<CSeqdesc> desc = AddSource (entry, "Clathrina aurea");
766     CRef<CSeq_feat> feat(new CSeq_feat());
767     feat->SetData().SetImp().SetKey("misc_feature");
768     feat->SetComment("amplified with primers designed for 16S ribosomal RNA");
769     AddFeat(feat, entry);
770 
771     AddTitle(entry, "Clathrina aurea sequence.");
772 
773     CheckDeflineMatches(entry);
774 }
775 
BOOST_AUTO_TEST_CASE(Test_DocsumTitle_Popset)776 BOOST_AUTO_TEST_CASE(Test_DocsumTitle_Popset)
777 {
778     CRef<CSeq_entry> seq1 = unit_test_util::BuildGoodNucProtSet();
779     unit_test_util::SetTaxname(seq1, "Pinus cembra");
780     // clear previous taxid before setting new one
781     unit_test_util::SetTaxon(seq1, 0);
782     unit_test_util::SetTaxon(seq1, 58041);
783     unit_test_util::SetOrgMod(seq1, COrgMod::eSubtype_isolate, "AcesapD07");
784     string defline = "Pinus cembra AcesapD07 fake protein name gene, complete cds.";
785     AddTitle(unit_test_util::GetNucleotideSequenceFromGoodNucProtSet(seq1), defline);
786 
787     CRef<CSeq_entry> seq2 = unit_test_util::BuildGoodNucProtSet();
788     unit_test_util::ChangeId(seq2, "2");
789     unit_test_util::SetTaxname(seq2, "Pinus cembra");
790     // clear previous taxid before setting new one
791     unit_test_util::SetTaxon(seq2, 0);
792     unit_test_util::SetTaxon(seq2, 58041);
793     unit_test_util::SetOrgMod(seq2, COrgMod::eSubtype_isolate, "AcesapD12");
794     defline = "Pinus cembra AcesapD12 fake protein name gene, complete cds.";
795     AddTitle(unit_test_util::GetNucleotideSequenceFromGoodNucProtSet(seq2), defline);
796 
797     CRef<CSeq_entry> seq3 = unit_test_util::BuildGoodNucProtSet();
798     unit_test_util::ChangeId(seq3, "3");
799     unit_test_util::SetTaxname(seq3, "Pinus cembra");
800     // clear previous taxid before setting new one
801     unit_test_util::SetTaxon(seq3, 0);
802     unit_test_util::SetTaxon(seq3, 58041);
803     unit_test_util::SetOrgMod(seq3, COrgMod::eSubtype_isolate, "AcesapD33");
804     defline = "Pinus cembra AcesapD33 fake protein name gene, complete cds.";
805     AddTitle(unit_test_util::GetNucleotideSequenceFromGoodNucProtSet(seq3), defline);
806 
807 
808     CRef<CSeq_entry> set(new CSeq_entry());
809     set->SetSet().SetClass(CBioseq_set::eClass_pop_set);
810     set->SetSet().SetSeq_set().push_back(seq1);
811     set->SetSet().SetSeq_set().push_back(seq2);
812     set->SetSet().SetSeq_set().push_back(seq3);
813     defline = "Pinus cembra fake protein name gene, complete cds.";
814     AddTitle(set, defline);
815     CheckDeflineMatches(set, true);
816 }
817 
818 
BOOST_AUTO_TEST_CASE(Test_DocsumTitle_Physet)819 BOOST_AUTO_TEST_CASE(Test_DocsumTitle_Physet)
820 {
821     CRef<CSeq_entry> seq1 = unit_test_util::BuildGoodNucProtSet();
822     unit_test_util::SetTaxname(seq1, "Bembidion mendocinum");
823     // clear previous taxid before setting new one
824     unit_test_util::SetTaxon(seq1, 0);
825     unit_test_util::SetTaxon(seq1, 1353850);
826     string defline = "Bembidion mendocinum fake protein name gene, complete cds.";
827     AddTitle(unit_test_util::GetNucleotideSequenceFromGoodNucProtSet(seq1), defline);
828 
829     CRef<CSeq_entry> seq2 = unit_test_util::BuildGoodNucProtSet();
830     unit_test_util::ChangeId(seq2, "2");
831     unit_test_util::SetTaxname(seq2, "Bembidion orregoi");
832     // clear previous taxid before setting new one
833     unit_test_util::SetTaxon(seq2, 0);
834     unit_test_util::SetTaxon(seq2, 1353851);
835     defline = "Bembidion orregoi fake protein name gene, complete cds.";
836     AddTitle(unit_test_util::GetNucleotideSequenceFromGoodNucProtSet(seq2), defline);
837 
838     CRef<CSeq_entry> set(new CSeq_entry());
839     set->SetSet().SetClass(CBioseq_set::eClass_pop_set);
840     set->SetSet().SetSeq_set().push_back(seq1);
841     set->SetSet().SetSeq_set().push_back(seq2);
842     defline = "Chilioperyphus fake protein name gene, complete cds.";
843     AddTitle(set, defline);
844     CheckDeflineMatches(set, true);
845 }
846 
847 
BOOST_AUTO_TEST_CASE(Test_GB_3108)848 BOOST_AUTO_TEST_CASE(Test_GB_3108)
849 {
850     CRef<CSeq_entry> entry = BuildSequence();
851     CRef<CSeqdesc> desc = AddSource (entry, "Fusarium incarnatum");
852     CRef<CSeq_feat> feat1(new CSeq_feat());
853     feat1->SetData().SetRna().SetType(CRNA_ref::eType_rRNA);
854     feat1->SetData().SetRna().SetExt().SetName("5.8S ribosomal RNA");
855     AddFeat(feat1, entry);
856     feat1->SetLocation().SetInt().SetTo(19);
857     feat1->SetLocation().SetPartialStart(true, eExtreme_Biological);
858     CRef<CSeq_feat> feat2(new CSeq_feat());
859     feat2->SetData().SetRna().SetType(CRNA_ref::eType_miscRNA);
860     feat2->SetData().SetRna().SetExt().SetName("internal transcribed spacer 2");
861     AddFeat(feat2, entry);
862     feat2->SetLocation().SetInt().SetFrom(20);
863     feat2->SetLocation().SetInt().SetTo(39);
864 
865     CRef<CSeq_feat> feat3(new CSeq_feat());
866     feat3->SetData().SetRna().SetType(CRNA_ref::eType_rRNA);
867     feat3->SetData().SetRna().SetExt().SetName("28S ribosomal RNA");
868     AddFeat(feat3, entry);
869     feat3->SetLocation().SetInt().SetFrom(40);
870     feat3->SetLocation().SetInt().SetTo(59);
871     feat3->SetLocation().SetPartialStop(true, eExtreme_Biological);
872 
873     AddTitle(entry, "Fusarium incarnatum 5.8S ribosomal RNA gene, partial sequence; internal transcribed spacer 2, complete sequence; and 28S ribosomal RNA gene, partial sequence.");
874 
875     CheckDeflineMatches(entry);
876 
877     feat2->SetData().SetRna().SetType(CRNA_ref::eType_other);
878     CheckDeflineMatches(entry);
879 
880 }
881 
882 
BOOST_AUTO_TEST_CASE(Test_GB_3099)883 BOOST_AUTO_TEST_CASE(Test_GB_3099)
884 {
885     CRef<CSeq_entry> seq = unit_test_util::BuildGoodNucProtSet();
886     unit_test_util::SetTaxname(seq, "Influenza A virus (A/USA/RVD1_H1/2011(H1N1))");
887     string defline = "Influenza A virus (A/USA/RVD1_H1/2011(H1N1)) hemagglutinin (HA) gene, complete cds.";
888     CRef<CSeq_entry> nuc = unit_test_util::GetNucleotideSequenceFromGoodNucProtSet(seq);
889     AddTitle(nuc, defline);
890     unit_test_util::SetNucProtSetProductName(seq, "hemagglutinin");
891     CRef<CSeq_feat> gene(new CSeq_feat());
892     gene->SetData().SetGene().SetLocus("HA");
893     AddFeat(gene, nuc);
894 
895     CheckDeflineMatches(seq, true);
896 }
897 
898 
BOOST_AUTO_TEST_CASE(Test_GB_3359)899 BOOST_AUTO_TEST_CASE(Test_GB_3359)
900 {
901     CRef<CSeq_entry> seq = unit_test_util::BuildGoodSeq();
902     unit_test_util::SetTaxname(seq, "Erwinia amylovora");
903     seq->SetSeq().SetInst().SetMol(CSeq_inst::eMol_rna);
904     unit_test_util::SetBiomol(seq, CMolInfo::eBiomol_transcribed_RNA);
905     CRef<CSeq_feat> ncrna = unit_test_util::BuildGoodFeat ();
906     ncrna->SetData().SetRna().SetType(CRNA_ref::eType_ncRNA);
907     ncrna->SetData().SetRna().SetExt().SetGen().SetProduct("RmaA");
908     ncrna->SetData().SetRna().SetExt().SetGen().SetClass("antisense_RNA");
909     unit_test_util::AddFeat (ncrna, seq);
910     string defline = "Erwinia amylovora RmaA antisense RNA, complete sequence.";
911     AddTitle(seq, defline);
912     CheckDeflineMatches(seq, true);
913 
914 }
915 
916 
TestOneOrganelleSequenceDefline(CBioSource::TGenome genome,const string & defline)917 void TestOneOrganelleSequenceDefline(CBioSource::TGenome genome, const string& defline)
918 {
919     CRef<CSeq_entry> seq = unit_test_util::BuildGoodSeq();
920     unit_test_util::SetGenome(seq, genome);
921     AddTitle(seq, defline);
922     CheckDeflineMatches(seq, true, objects::CAutoDefOptions::eSequence);
923 }
924 
925 
BOOST_AUTO_TEST_CASE(Test_SQD_1733)926 BOOST_AUTO_TEST_CASE(Test_SQD_1733)
927 {
928     TestOneOrganelleSequenceDefline(CBioSource::eGenome_unknown, "Sebaea microphylla genomic sequence.");
929     TestOneOrganelleSequenceDefline(CBioSource::eGenome_mitochondrion, "Sebaea microphylla mitochondrion sequence.");
930     TestOneOrganelleSequenceDefline(CBioSource::eGenome_apicoplast, "Sebaea microphylla apicoplast sequence.");
931     TestOneOrganelleSequenceDefline(CBioSource::eGenome_chloroplast, "Sebaea microphylla chloroplast sequence.");
932     TestOneOrganelleSequenceDefline(CBioSource::eGenome_kinetoplast, "Sebaea microphylla kinetoplast sequence.");
933     TestOneOrganelleSequenceDefline(CBioSource::eGenome_leucoplast, "Sebaea microphylla leucoplast sequence.");
934 
935 }
936 
937 
AddExon(CRef<CSeq_entry> seq,const string & number,TSeqPos start)938 void AddExon(CRef<CSeq_entry> seq, const string& number, TSeqPos start)
939 {
940     CRef<CSeq_feat> exon = unit_test_util::AddGoodImpFeat(seq, "exon");
941     exon->ResetComment();
942     exon->SetLocation().SetInt().SetFrom(start);
943     exon->SetLocation().SetInt().SetTo(start + 5);
944     if (!NStr::IsBlank(number)) {
945         CRef<CGb_qual> qual(new CGb_qual());
946         qual->SetQual("number");
947         qual->SetVal(number);
948         exon->SetQual().push_back(qual);
949     }
950 }
951 
952 
BOOST_AUTO_TEST_CASE(Test_GB_3386)953 BOOST_AUTO_TEST_CASE(Test_GB_3386)
954 {
955     CRef<CSeq_entry> nps = unit_test_util::BuildGoodNucProtSet();
956     CRef<CSeq_entry> nuc = unit_test_util::GetNucleotideSequenceFromGoodNucProtSet(nps);
957     CRef<CSeq_feat> cds = unit_test_util::GetCDSFromGoodNucProtSet (nps);
958     cds->SetLocation().SetPartialStop(true, eExtreme_Biological);
959     AddExon(nuc, "1", cds->GetLocation().GetStart(eExtreme_Positional));
960 
961     string defline = "Sebaea microphylla fake protein name gene, exon 1 and partial cds.";
962     AddTitle(nuc, defline);
963     CheckDeflineMatches(nps, true);
964 
965     AddExon(nuc, "2", cds->GetLocation().GetStart(eExtreme_Positional) + 10);
966     defline = "Sebaea microphylla fake protein name gene, exons 1 and 2 and partial cds.";
967     AddTitle(nuc, defline);
968     CheckDeflineMatches(nps, true);
969 
970     AddExon(nuc, "3", cds->GetLocation().GetStart(eExtreme_Positional) +20);
971     defline = "Sebaea microphylla fake protein name gene, exons 1 through 3 and partial cds.";
972     AddTitle(nuc, defline);
973     CheckDeflineMatches(nps, true);
974 }
975 
976 
BOOST_AUTO_TEST_CASE(Test_GB_3410)977 BOOST_AUTO_TEST_CASE(Test_GB_3410)
978 {
979     CRef<CSeq_entry> seq = unit_test_util::BuildGoodSeq();
980     CRef<objects::CSeq_feat> misc = unit_test_util::AddMiscFeature(seq);
981     misc->SetData().SetRna().SetType(CRNA_ref::eType_miscRNA);
982     misc->SetComment("contains internal transcribed spacer 1, 5.8S ribosomal RNA, and internal transcribed spacer 2");
983     AddTitle(seq, "Sebaea microphylla internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence.");
984 
985     CheckDeflineMatches(seq, true);
986 
987     misc->SetLocation().SetPartialStart(true, eExtreme_Biological);
988     misc->SetLocation().SetPartialStop(true, eExtreme_Biological);
989     AddTitle(seq, "Sebaea microphylla internal transcribed spacer 1, partial sequence; 5.8S ribosomal RNA gene, complete sequence; and internal transcribed spacer 2, partial sequence.");
990     CheckDeflineMatches(seq, true);
991 
992     misc->SetComment("contains 18S ribosomal RNA, internal transcribed spacer 1, 5.8S ribosomal RNA, and internal transcribed spacer 2");
993     AddTitle(seq, "Sebaea microphylla 18S ribosomal RNA gene, partial sequence; internal transcribed spacer 1 and 5.8S ribosomal RNA gene, complete sequence; and internal transcribed spacer 2, partial sequence.");
994     CheckDeflineMatches(seq, true);
995 }
996 
997 
BOOST_AUTO_TEST_CASE(Test_GB_3395)998 BOOST_AUTO_TEST_CASE(Test_GB_3395)
999 {
1000     CRef<CSeq_entry> seq = unit_test_util::BuildGoodSeq();
1001     CRef<objects::CSeq_feat> dloop = unit_test_util::AddGoodImpFeat (seq, "D-loop");
1002     dloop->ResetComment();
1003     AddTitle(seq, "Sebaea microphylla D-loop, complete sequence.");
1004     CheckDeflineMatches(seq, true);
1005 }
1006 
1007 
BOOST_AUTO_TEST_CASE(Test_GB_3439)1008 BOOST_AUTO_TEST_CASE(Test_GB_3439)
1009 {
1010     CRef<CSeq_entry> seq = unit_test_util::BuildGoodSeq();
1011     unit_test_util::SetTaxname(seq, "uncultured archaeon");
1012     CRef<objects::CSeq_feat> dloop = unit_test_util::AddGoodImpFeat (seq, "D-loop");
1013     dloop->ResetComment();
1014     AddTitle(seq, "Uncultured archaeon D-loop, complete sequence.");
1015     CheckDeflineMatches(seq, true);
1016 
1017 }
1018 
1019 
BOOST_AUTO_TEST_CASE(Test_GB_3488)1020 BOOST_AUTO_TEST_CASE(Test_GB_3488)
1021 {
1022     CRef<CSeq_entry> seq = unit_test_util::BuildGoodSeq();
1023     unit_test_util::SetTaxname(seq, "Cypripedium japonicum");
1024     CRef<objects::CSeq_feat> rna = unit_test_util::AddMiscFeature(seq);
1025     rna->SetData().SetRna().SetType(CRNA_ref::eType_rRNA);
1026     rna->ResetComment();
1027     AddTitle(seq, "Cypripedium japonicum gene, complete sequence.");
1028     CheckDeflineMatches(seq, true);
1029 
1030 }
1031 
BOOST_AUTO_TEST_CASE(Test_GB_3486)1032 BOOST_AUTO_TEST_CASE(Test_GB_3486)
1033 {
1034     CRef<CSeq_entry> seq = unit_test_util::BuildGoodSeq();
1035     unit_test_util::SetTaxname(seq, "Burkholderia sp. FERM BP-3421");
1036     CRef<objects::CSeq_feat> gene = unit_test_util::AddMiscFeature (seq);
1037     gene->ResetComment();
1038     gene->SetData().SetGene().SetLocus("fr9A");
1039     AddTitle(seq, "Burkholderia sp. FERM BP-3421 fr9A gene, complete sequence.");
1040     CheckDeflineMatches(seq, true);
1041 
1042     CRef<objects::CSeq_feat> gene_cluster = unit_test_util::AddMiscFeature(seq);
1043     gene_cluster->SetComment("spliceostatin/FR901464 biosynthetic gene cluster");
1044 
1045     AddTitle(seq, "Burkholderia sp. FERM BP-3421 spliceostatin/FR901464 biosynthetic gene cluster, complete sequence.");
1046     CheckDeflineMatches(seq, true);
1047 
1048 }
1049 
1050 
BOOST_AUTO_TEST_CASE(Test_GB_3496)1051 BOOST_AUTO_TEST_CASE(Test_GB_3496)
1052 {
1053     CRef<CSeq_entry> entry = unit_test_util::BuildGoodNucProtSet();
1054     CRef<CSeq_feat> cds = unit_test_util::GetCDSFromGoodNucProtSet (entry);
1055     CRef<CSeq_feat> gene = unit_test_util::MakeGeneForFeature (cds);
1056     gene->SetData().SetGene().SetLocus("matK");
1057     CRef<CSeq_entry> nuc = unit_test_util::GetNucleotideSequenceFromGoodNucProtSet (entry);
1058     unit_test_util::AddFeat(gene, nuc);
1059     CRef<CSeq_feat> prot = unit_test_util::GetProtFeatFromGoodNucProtSet (entry);
1060     prot->SetData().SetProt().SetName().front() = "maturase K";
1061 
1062     CRef<CSeq_feat> intron = unit_test_util::AddGoodImpFeat (nuc, "intron");
1063     intron->SetLocation().SetInt().SetTo(nuc->GetSeq().GetLength() - 1);
1064     intron->SetLocation().SetPartialStart(true, eExtreme_Biological);
1065     intron->SetLocation().SetPartialStop(true, eExtreme_Biological);
1066     intron->SetPartial(true);
1067     CRef<CSeq_feat> gene2 = unit_test_util::MakeGeneForFeature (intron);
1068     gene2->SetData().SetGene().SetLocus("trnK");
1069     gene2->SetData().SetGene().SetDesc("tRNA-Lys");
1070     unit_test_util::AddFeat(gene2, nuc);
1071 
1072     AddTitle(nuc, "Sebaea microphylla tRNA-Lys (trnK) gene, partial sequence; and maturase K (matK) gene, complete cds.");
1073     CheckDeflineMatches(entry, true);
1074 
1075 }
1076 
1077 
BOOST_AUTO_TEST_CASE(Test_GB_3458)1078 BOOST_AUTO_TEST_CASE(Test_GB_3458)
1079 {
1080     // if second coding region does not have protein name, should still not be considered alternatively spliced
1081     CRef<CSeq_entry> entry = unit_test_util::BuildGoodNucProtSet();
1082     CRef<CSeq_entry> nuc = unit_test_util::GetNucleotideSequenceFromGoodNucProtSet (entry);
1083     CRef<CSeq_feat> cds1 = unit_test_util::GetCDSFromGoodNucProtSet (entry);
1084     CRef<CSeq_feat> gene1 = unit_test_util::MakeGeneForFeature (cds1);
1085     gene1->SetData().SetGene().SetLocus("M1");
1086     unit_test_util::AddFeat(gene1, nuc);
1087     CRef<CSeq_feat> cds2 = unit_test_util::AddMiscFeature(nuc);
1088     cds2->SetData().SetCdregion();
1089     cds2->ResetComment();
1090     cds2->SetLocation().SetInt().SetFrom(cds1->GetLocation().GetStart(eExtreme_Positional));
1091     cds2->SetLocation().SetInt().SetTo(nuc->GetSeq().GetInst().GetLength() - 1);
1092     CRef<CSeq_feat> gene2 = unit_test_util::MakeGeneForFeature (cds2);
1093     gene2->SetData().SetGene().SetLocus("M2");
1094     unit_test_util::AddFeat(gene2, nuc);
1095     // make protein for second coding region, with no protein feature
1096     CRef<CSeq_entry> pentry(new CSeq_entry());
1097     pentry->SetSeq().SetInst().SetMol(objects::CSeq_inst::eMol_aa);
1098     pentry->SetSeq().SetInst().SetRepr(objects::CSeq_inst::eRepr_raw);
1099     pentry->SetSeq().SetInst().SetSeq_data().SetIupacaa().Set("MPRKTEIN");
1100     pentry->SetSeq().SetInst().SetLength(8);
1101 
1102     CRef<objects::CSeq_id> pid(new objects::CSeq_id());
1103     pid->SetLocal().SetStr ("prot2");
1104     pentry->SetSeq().SetId().push_back(pid);
1105     entry->SetSet().SetSeq_set().push_back(pentry);
1106     cds2->SetProduct().SetWhole().SetLocal().SetStr("prot2");
1107 
1108     AddTitle(nuc, "Sebaea microphylla M2 and fake protein name (M1) genes, complete cds.");
1109     CheckDeflineMatches(entry, true);
1110 }
1111 
1112 
BOOST_AUTO_TEST_CASE(Test_GB_3679)1113 BOOST_AUTO_TEST_CASE(Test_GB_3679)
1114 {
1115     // if second coding region does not have protein name, should still not be considered alternatively spliced
1116     CRef<CSeq_entry> entry = unit_test_util::BuildGoodNucProtSet();
1117     CRef<CSeq_entry> nuc = unit_test_util::GetNucleotideSequenceFromGoodNucProtSet (entry);
1118     CRef<CSeq_feat> cds1 = unit_test_util::GetCDSFromGoodNucProtSet (entry);
1119     CRef<CSeq_feat> gene1 = unit_test_util::MakeGeneForFeature (cds1);
1120     gene1->SetData().SetGene().SetLocus("M1");
1121     unit_test_util::AddFeat(gene1, nuc);
1122 
1123     CRef<CSeq_feat> integron = unit_test_util::AddMiscFeature(nuc);
1124     integron->SetData().SetImp().SetKey("mobile_element");
1125     CRef<CGb_qual> q(new CGb_qual());
1126     q->SetQual("mobile_element_type");
1127     q->SetVal("integron:class I");
1128     integron->SetQual().push_back(q);
1129     integron->SetLocation().SetInt().SetFrom(0);
1130     integron->SetLocation().SetInt().SetTo(nuc->GetSeq().GetLength() - 1);
1131 
1132 
1133     AddTitle(nuc, "Sebaea microphylla class I integron fake protein name (M1) gene, complete cds.");
1134     CheckDeflineMatches(entry, true);
1135 }
1136 
1137 
BOOST_AUTO_TEST_CASE(Test_GB_3848)1138 BOOST_AUTO_TEST_CASE(Test_GB_3848)
1139 {
1140     CRef<CSeq_entry> entry = unit_test_util::BuildGoodNucProtSet();
1141     CRef<CSeq_entry> nuc = unit_test_util::GetNucleotideSequenceFromGoodNucProtSet (entry);
1142     CRef<CSeq_feat> cds1 = unit_test_util::GetCDSFromGoodNucProtSet (entry);
1143     CRef<CSeq_feat> gene1 = unit_test_util::MakeGeneForFeature (cds1);
1144     gene1->SetData().SetGene().SetLocus("gltB");
1145     gene1->SetData().SetGene().SetAllele("16");
1146     unit_test_util::AddFeat(gene1, nuc);
1147 
1148     AddTitle(nuc, "Sebaea microphylla fake protein name (gltB) gene, gltB-16 allele, complete cds.");
1149     CheckDeflineMatches(entry, true);
1150 }
1151 
1152 
BOOST_AUTO_TEST_CASE(Test_SQD_2075)1153 BOOST_AUTO_TEST_CASE(Test_SQD_2075)
1154 {
1155     CRef<CSeq_entry> seq = unit_test_util::BuildGoodSeq();
1156     CRef<objects::CSeq_feat> misc = unit_test_util::AddMiscFeature(seq);
1157     misc->SetComment("contains tRNA-Pro and control region");
1158     misc->SetLocation().SetPartialStart(true, eExtreme_Biological);
1159     misc->SetLocation().SetPartialStop(true, eExtreme_Biological);
1160     AddTitle(seq, "Sebaea microphylla tRNA-Pro gene and control region, partial sequence.");
1161     CheckDeflineMatches(seq, true);
1162 }
1163 
1164 
BOOST_AUTO_TEST_CASE(Test_SQD_2115)1165 BOOST_AUTO_TEST_CASE(Test_SQD_2115)
1166 {
1167     CRef<CSeq_entry> seq = unit_test_util::BuildGoodSeq();
1168     CRef<objects::CSeq_feat> promoter = unit_test_util::AddMiscFeature(seq);
1169     promoter->ResetComment();
1170     promoter->SetData().SetImp().SetKey("regulatory");
1171     CRef<CGb_qual> q(new CGb_qual());
1172     q->SetQual("regulatory_class");
1173     q->SetVal("promoter");
1174     promoter->SetQual().push_back(q);
1175     AddTitle(seq, "Sebaea microphylla promoter region.");
1176     CheckDeflineMatches(seq, true);
1177 
1178     CRef<CSeq_feat> gene = unit_test_util::MakeGeneForFeature (promoter);
1179     gene->SetData().SetGene().SetLocus("chs");
1180     unit_test_util::AddFeat(gene, seq);
1181 
1182     AddTitle(seq, "Sebaea microphylla chs gene, promoter region.");
1183     CheckDeflineMatches(seq, true);
1184 
1185 }
1186 
1187 
BOOST_AUTO_TEST_CASE(Test_GB_3866)1188 BOOST_AUTO_TEST_CASE(Test_GB_3866)
1189 {
1190     CRef<CSeq_entry> seq = unit_test_util::BuildGoodSeq();
1191     CRef<objects::CSeq_feat> misc1 = unit_test_util::AddMiscFeature(seq);
1192     misc1->SetData().SetRna().SetType(CRNA_ref::eType_miscRNA);
1193     misc1->SetComment("contains 18S ribosomal RNA and internal transcribed spacer 1");
1194     misc1->SetLocation().SetInt().SetFrom(0);
1195     misc1->SetLocation().SetInt().SetTo(15);
1196     misc1->SetLocation().SetPartialStart(true, eExtreme_Biological);
1197 
1198     CRef<objects::CSeq_feat> rna = unit_test_util::AddMiscFeature(seq);
1199     rna->SetData().SetRna().SetType(CRNA_ref::eType_rRNA);
1200     rna->SetData().SetRna().SetExt().SetName("5.8S ribosomal RNA");
1201     rna->SetLocation().SetInt().SetFrom(16);
1202     rna->SetLocation().SetInt().SetTo(19);
1203 
1204     CRef<objects::CSeq_feat> misc2 = unit_test_util::AddMiscFeature(seq);
1205     misc2->SetData().SetRna().SetType(CRNA_ref::eType_miscRNA);
1206     misc2->SetComment("contains internal transcribed spacer 2 and 28S ribosomal RNA");
1207     misc2->SetLocation().SetInt().SetFrom(20);
1208     misc2->SetLocation().SetInt().SetTo(35);
1209     misc2->SetLocation().SetPartialStop(true, eExtreme_Biological);
1210 
1211     AddTitle(seq, "Sebaea microphylla 18S ribosomal RNA gene, partial \
1212 sequence; internal transcribed spacer 1, 5.8S ribosomal RNA gene, and \
1213 internal transcribed spacer 2, complete sequence; and 28S ribosomal RNA \
1214 gene, partial sequence.");
1215     CheckDeflineMatches(seq, true);
1216 }
1217 
1218 
BOOST_AUTO_TEST_CASE(Test_SQD_2118)1219 BOOST_AUTO_TEST_CASE(Test_SQD_2118)
1220 {
1221     CRef<CSeq_entry> seq = unit_test_util::BuildGoodSeq();
1222     CRef<objects::CSeq_feat> misc1 = unit_test_util::AddMiscFeature(seq);
1223     misc1->SetComment("contains tRNA-Thr, tRNA-Pro, and control region");
1224     misc1->SetLocation().SetInt().SetFrom(0);
1225     misc1->SetLocation().SetInt().SetTo(15);
1226     misc1->SetLocation().SetPartialStart(true, eExtreme_Biological);
1227     misc1->SetLocation().SetPartialStop(true, eExtreme_Biological);
1228 
1229     AddTitle(seq, "Sebaea microphylla tRNA-Thr gene, partial sequence; \
1230 tRNA-Pro gene, complete sequence; and control region, partial sequence.");
1231     CheckDeflineMatches(seq, true);
1232 
1233 }
1234 
1235 
BOOST_AUTO_TEST_CASE(Test_GB_1851)1236 BOOST_AUTO_TEST_CASE(Test_GB_1851)
1237 {
1238     CRef<CSeq_entry> seq = unit_test_util::BuildGoodSeq();
1239     CRef<objects::CSeq_feat> misc1 = unit_test_util::AddMiscFeature(seq);
1240     misc1->SetComment("nonfunctional xyz due to argle");
1241     misc1->SetLocation().SetInt().SetFrom(0);
1242     misc1->SetLocation().SetInt().SetTo(15);
1243     misc1->SetLocation().SetPartialStart(true, eExtreme_Biological);
1244     misc1->SetLocation().SetPartialStop(true, eExtreme_Biological);
1245 
1246     // kept because the misc_feature is alone
1247     AddTitle(seq, "Sebaea microphylla nonfunctional xyz gene, partial sequence.");
1248     CheckDeflineMatches(seq, true, CAutoDefOptions::eListAllFeatures, CAutoDefOptions::eDelete);
1249     AddTitle(seq, "Sebaea microphylla nonfunctional xyz gene, partial sequence.");
1250     CheckDeflineMatches(seq, true, CAutoDefOptions::eListAllFeatures, CAutoDefOptions::eNoncodingProductFeat);
1251     AddTitle(seq, "Sebaea microphylla nonfunctional xyz due to argle genomic sequence.");
1252     CheckDeflineMatches(seq, true, CAutoDefOptions::eListAllFeatures, CAutoDefOptions::eCommentFeat);
1253 
1254 
1255     misc1->SetComment("similar to xyz");
1256     AddTitle(seq, "Sebaea microphylla xyz-like gene, partial sequence.");
1257     CheckDeflineMatches(seq, true, CAutoDefOptions::eListAllFeatures, CAutoDefOptions::eNoncodingProductFeat);
1258 
1259 }
1260 
1261 
s_SetProteinName(CRef<CSeq_entry> prot,const string & name)1262 void s_SetProteinName(CRef<CSeq_entry> prot, const string& name)
1263 {
1264     prot->SetSeq().SetAnnot().front()->SetData().SetFtable().front()->SetData().SetProt().SetName().front() = name;
1265 }
1266 
1267 
s_AddCDS(CRef<CSeq_entry> np,const string & name,TSeqPos from,TSeqPos to)1268 CRef<CSeq_feat> s_AddCDS(CRef<CSeq_entry> np, const string& name, TSeqPos from, TSeqPos to)
1269 {
1270     CRef<CSeq_entry> prev_prot = np->SetSet().SetSeq_set().back();
1271     CRef<CSeq_entry> new_prot (new CSeq_entry());
1272     new_prot->Assign(*prev_prot);
1273     CRef<CSeq_id> new_id(new CSeq_id());
1274     new_id->Assign(*(prev_prot->GetSeq().GetId().front()));
1275     size_t pos = NStr::Find(new_id->GetLocal().GetStr(), "_");
1276     string prefix = new_id->GetLocal().GetStr().substr(0, pos+ 1);
1277     string suffix = new_id->GetLocal().GetStr().substr(pos + 1);
1278     int prev_offset = NStr::StringToInt(suffix);
1279     new_id->SetLocal().SetStr(prefix + NStr::NumericToString(prev_offset + 1));
1280     unit_test_util::ChangeId(new_prot, new_id);
1281     s_SetProteinName(new_prot, name);
1282     np->SetSet().SetSeq_set().push_back(new_prot);
1283 
1284     CRef<CSeq_feat> prev_cds = np->SetSet().SetAnnot().front()->SetData().SetFtable().back();
1285     CRef<CSeq_feat> new_cds(new CSeq_feat());
1286     new_cds->Assign(*prev_cds);
1287     new_cds->SetProduct().SetWhole().Assign(*new_id);
1288     new_cds->SetLocation().SetInt().SetFrom(from);
1289     new_cds->SetLocation().SetInt().SetTo(to);
1290     np->SetSet().SetAnnot().front()->SetData().SetFtable().push_back(new_cds);
1291     return new_cds;
1292 }
1293 
1294 
BOOST_AUTO_TEST_CASE(Test_GB_3942)1295 BOOST_AUTO_TEST_CASE(Test_GB_3942)
1296 {
1297     CRef<CSeq_entry> entry = unit_test_util::BuildGoodNucProtSet();
1298     CRef<CSeq_entry> nuc = unit_test_util::GetNucleotideSequenceFromGoodNucProtSet (entry);
1299     CRef<CSeq_entry> prot1 = unit_test_util::GetProteinSequenceFromGoodNucProtSet(entry);
1300     CRef<CSeq_feat> cds1 = unit_test_util::GetCDSFromGoodNucProtSet (entry);
1301 
1302     unit_test_util::ChangeId(prot1, "_1");
1303     cds1->SetLocation().SetInt().SetFrom(0);
1304     cds1->SetLocation().SetInt().SetTo(5);
1305     cds1->SetProduct().SetWhole().Assign(*(prot1->GetSeq().GetId().front()));
1306     s_SetProteinName(prot1, "RNA-dependent RNA polymerase");
1307 
1308     CRef<CSeq_feat> cds2 = s_AddCDS(entry, "Coat protein", 10, 25);
1309     CRef<CSeq_feat> cds3 = s_AddCDS(entry, "Movement protein", 12, 20);
1310 
1311     cds1->SetLocation().SetPartialStart(true, eExtreme_Biological);
1312 
1313     AddTitle(nuc, "Sebaea microphylla RNA-dependent RNA polymerase gene, partial cds; and Coat protein and Movement protein genes, complete cds.");
1314     CheckDeflineMatches(entry, true);
1315 
1316 
1317     // actual splicing
1318     cds2->SetLocation().Assign(*(unit_test_util::MakeMixLoc(nuc->GetSeq().GetId().front())));
1319     cds3->SetLocation().Assign(cds2->GetLocation());
1320     TSeqPos old_end = cds3->GetLocation().GetMix().Get().back()->GetInt().GetTo();
1321     cds3->SetLocation().SetMix().Set().back()->SetInt().SetTo(old_end + 2);
1322 
1323     AddTitle(nuc, "Sebaea microphylla protein gene, complete cds, alternatively spliced; and RNA-dependent RNA polymerase gene, partial cds.");
1324     CheckDeflineMatches(entry, true);
1325 
1326 }
1327 
1328 
BOOST_AUTO_TEST_CASE(Test_GB_8927)1329 BOOST_AUTO_TEST_CASE(Test_GB_8927)
1330 {
1331     CRef<CSeq_entry> entry = unit_test_util::BuildGoodNucProtSet();
1332     CRef<CSeq_entry> nuc = unit_test_util::GetNucleotideSequenceFromGoodNucProtSet(entry);
1333     CRef<CSeq_entry> prot1 = unit_test_util::GetProteinSequenceFromGoodNucProtSet(entry);
1334     CRef<CSeq_feat> cds1 = unit_test_util::GetCDSFromGoodNucProtSet(entry);
1335 
1336     unit_test_util::ChangeId(prot1, "_1");
1337     cds1->SetLocation().SetInt().SetFrom(0);
1338     cds1->SetLocation().SetInt().SetTo(5);
1339     cds1->SetProduct().SetWhole().Assign(*(prot1->GetSeq().GetId().front()));
1340     s_SetProteinName(prot1, "RNA-dependent RNA polymerase");
1341 
1342     CRef<CSeq_feat> cds2 = s_AddCDS(entry, "Coat protein", 10, 25);
1343     CRef<CSeq_feat> cds3 = s_AddCDS(entry, "Movement protein", 12, 20);
1344 
1345     cds1->SetLocation().SetPartialStart(true, eExtreme_Biological);
1346     cds2->SetLocation().Assign(*(unit_test_util::MakeMixLoc(nuc->GetSeq().GetId().front())));
1347     cds3->SetLocation().Assign(cds2->GetLocation());
1348     TSeqPos old_end = cds3->GetLocation().GetMix().Get().back()->GetInt().GetTo();
1349     cds3->SetLocation().SetMix().Set().back()->SetInt().SetTo(old_end + 2);
1350 
1351     unit_test_util::SetDiv(entry, "VRL");
1352 
1353     AddTitle(nuc, "Sebaea microphylla Movement protein and Coat protein genes, complete cds; and RNA-dependent RNA polymerase gene, partial cds.");
1354     CheckDeflineMatches(entry, true);
1355 }
1356 
1357 
BOOST_AUTO_TEST_CASE(Test_GB_3926)1358 BOOST_AUTO_TEST_CASE(Test_GB_3926)
1359 {
1360     CRef<CSeq_entry> seq = unit_test_util::BuildGoodSeq();
1361     CRef<objects::CSeq_feat> misc1 = unit_test_util::AddMiscFeature(seq);
1362     misc1->ResetComment();
1363     misc1->SetData().SetRna().SetType(CRNA_ref::eType_rRNA);
1364     misc1->SetData().SetRna().SetExt().SetName("28S ribosomal RNA");
1365     misc1->SetLocation().SetPartialStart(true, eExtreme_Biological);
1366     misc1->SetLocation().SetPartialStop(true, eExtreme_Biological);
1367     unit_test_util::SetOrgMod(seq, COrgMod::eSubtype_isolate, "JU6");
1368     unit_test_util::SetSubSource(seq, CSubSource::eSubtype_clone, "1");
1369 
1370     AddTitle(seq, "Sebaea microphylla isolate JU6 clone 1 28S ribosomal RNA gene, partial sequence.");
1371 
1372     vector<CSubSource::ESubtype> subsrcs;
1373     vector<COrgMod::ESubtype> orgmods;
1374     subsrcs.push_back(CSubSource::eSubtype_clone);
1375     orgmods.push_back(COrgMod::eSubtype_isolate);
1376 
1377     CheckDeflineMatches(seq, subsrcs, orgmods);
1378 }
1379 
1380 
BOOST_AUTO_TEST_CASE(Test_SQD_2181)1381 BOOST_AUTO_TEST_CASE(Test_SQD_2181)
1382 {
1383     CRef<CSeq_entry> entry = unit_test_util::BuildGoodNucProtSet();
1384     CRef<CSeq_entry> nuc = unit_test_util::GetNucleotideSequenceFromGoodNucProtSet (entry);
1385     CRef<CSeq_feat> cds1 = unit_test_util::GetCDSFromGoodNucProtSet (entry);
1386     CRef<objects::CSeq_feat> misc1 = unit_test_util::AddMiscFeature(nuc);
1387     misc1->ResetComment();
1388     misc1->SetData().SetImp().SetKey("regulatory");
1389     CRef<CGb_qual> q(new CGb_qual());
1390     q->SetQual("regulatory_class");
1391     q->SetVal("promoter");
1392     misc1->SetQual().push_back(q);
1393 
1394     AddTitle(nuc, "Sebaea microphylla fake protein name gene, promoter region and complete cds.");
1395 
1396 
1397     CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
1398 
1399     CRef<CScope> scope(new CScope(*object_manager));
1400     CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry (*entry);
1401 
1402     objects::CAutoDefWithTaxonomy autodef;
1403 
1404     // add to autodef
1405     autodef.AddSources (seh);
1406 
1407     CRef<CAutoDefModifierCombo> mod_combo(new CAutoDefModifierCombo ());
1408 
1409     autodef.SetFeatureListType(CAutoDefOptions::eListAllFeatures);
1410     autodef.SetMiscFeatRule(CAutoDefOptions::eDelete);
1411     autodef.SetUseFakePromoters(true);
1412 
1413     CheckDeflineMatches(seh, autodef, mod_combo);
1414 }
1415 
1416 
BOOST_AUTO_TEST_CASE(Test_GB_3949)1417 BOOST_AUTO_TEST_CASE(Test_GB_3949)
1418 {
1419     CRef<CSeq_entry> entry = unit_test_util::BuildGoodNucProtSet();
1420     unit_test_util::SetOrgMod(entry, COrgMod::eSubtype_culture_collection, "ATCC:12345");
1421 
1422     CRef<CSeq_entry> nuc = unit_test_util::GetNucleotideSequenceFromGoodNucProtSet (entry);
1423     AddTitle(nuc, "Sebaea microphylla culture ATCC:12345 fake protein name gene, complete cds.");
1424 
1425     vector<CSubSource::ESubtype> subsrcs;
1426     vector<COrgMod::ESubtype> orgmods;
1427     orgmods.push_back(COrgMod::eSubtype_culture_collection);
1428 
1429     CheckDeflineMatches(entry, subsrcs, orgmods);
1430 }
1431 
BOOST_AUTO_TEST_CASE(Test_GB_4043)1432 BOOST_AUTO_TEST_CASE(Test_GB_4043)
1433 {
1434     CRef<CSeq_entry> entry = unit_test_util::BuildGoodNucProtSet();
1435     CRef<CSeq_feat> cds = unit_test_util::GetCDSFromGoodNucProtSet (entry);
1436     CRef<CSeq_entry> nuc = unit_test_util::GetNucleotideSequenceFromGoodNucProtSet (entry);
1437     cds->SetLocation().SetInt().SetFrom(20);
1438     cds->SetLocation().SetPartialStart(true, eExtreme_Biological);
1439     CRef<objects::CSeq_feat> intron = unit_test_util::AddMiscFeature(nuc);
1440     intron->SetData().SetImp().SetKey("intron");
1441     intron->SetLocation().SetInt().SetFrom(0);
1442     intron->SetLocation().SetInt().SetTo(19);
1443     intron->SetLocation().SetPartialStart(true, eExtreme_Biological);
1444     intron->ResetComment();
1445     intron->SetQual().push_back(CRef<CGb_qual>(new CGb_qual("number", "2")));
1446     CRef<objects::CSeq_feat> gene = unit_test_util::AddMiscFeature(nuc);
1447     gene->SetData().SetGene().SetLocus("GAPDH");
1448     gene->SetLocation().SetInt().SetFrom(0);
1449     gene->SetLocation().SetInt().SetTo(cds->GetLocation().GetInt().GetTo());
1450     gene->SetLocation().SetPartialStart(true, eExtreme_Biological);
1451     gene->ResetComment();
1452 
1453     AddTitle(nuc, "Sebaea microphylla fake protein name (GAPDH) gene, intron 2 and partial cds.");
1454 
1455     CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
1456 
1457     CRef<CScope> scope(new CScope(*object_manager));
1458     CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry (*entry);
1459 
1460     objects::CAutoDefWithTaxonomy autodef;
1461 
1462     // add to autodef
1463     autodef.AddSources (seh);
1464     autodef.SetKeepIntrons(true);
1465 
1466     CRef<CAutoDefModifierCombo> mod_combo;
1467     mod_combo = autodef.FindBestModifierCombo();
1468 
1469     CheckDeflineMatches(seh, autodef, mod_combo);
1470 }
1471 
1472 
BOOST_AUTO_TEST_CASE(Test_GB_4078)1473 BOOST_AUTO_TEST_CASE(Test_GB_4078)
1474 {
1475     CRef<CSeq_entry> entry = unit_test_util::BuildGoodNucProtSet();
1476     CRef<CSeq_feat> cds = unit_test_util::GetCDSFromGoodNucProtSet (entry);
1477     CRef<CSeq_entry> nuc = unit_test_util::GetNucleotideSequenceFromGoodNucProtSet (entry);
1478     cds->SetLocation().SetPartialStart(true, eExtreme_Biological);
1479     CRef<objects::CSeq_feat> spacer = unit_test_util::AddMiscFeature(nuc);
1480     spacer->SetComment("G-L intergenic spacer");
1481     spacer->SetLocation().SetInt().SetFrom(cds->SetLocation().GetStart(eExtreme_Biological));
1482     spacer->SetLocation().SetInt().SetTo(cds->SetLocation().GetStart(eExtreme_Biological) + 2);
1483     spacer->SetLocation().SetPartialStop(true, eExtreme_Biological);
1484     CRef<objects::CSeq_feat> gene = unit_test_util::MakeGeneForFeature(cds);
1485     gene->SetData().SetGene().SetLocus("G");
1486     unit_test_util::AddFeat(gene, nuc);
1487 
1488 
1489     AddTitle(nuc, "Sebaea microphylla fake protein name (G) gene, partial cds; and G-L intergenic spacer, partial sequence.");
1490 
1491     CheckDeflineMatches(entry);
1492 
1493     unit_test_util::SetBiomol(nuc, CMolInfo::eBiomol_cRNA);
1494     nuc->SetSeq().SetInst().SetMol(CSeq_inst::eMol_rna);
1495 
1496 
1497     CheckDeflineMatches(entry);
1498 
1499 }
1500 
BOOST_AUTO_TEST_CASE(Test_SQD_2370)1501 BOOST_AUTO_TEST_CASE(Test_SQD_2370)
1502 {
1503     CRef<CSeq_entry> seq = unit_test_util::BuildGoodSeq();
1504     CRef<objects::CSeq_feat> misc1 = unit_test_util::AddMiscFeature(seq);
1505     misc1->SetComment("atpB-rbcL intergenic spacer region");
1506 
1507     AddTitle(seq, "Sebaea microphylla atpB-rbcL intergenic spacer region, complete sequence.");
1508 
1509     CheckDeflineMatches(seq);
1510 }
1511 
BOOST_AUTO_TEST_CASE(Test_GB_4242)1512 BOOST_AUTO_TEST_CASE(Test_GB_4242)
1513 {
1514     CRef<CSeq_entry> seq = unit_test_util::BuildGoodSeq();
1515     unit_test_util::SetTaxname(seq, "Trichoderma sp. FPZSP372");
1516     unit_test_util::SetOrgMod(seq, COrgMod::eSubtype_isolate, "FPZSP37");
1517     AddTitle(seq, "Trichoderma sp. FPZSP372 sequence.");
1518 
1519     vector<CSubSource::ESubtype> subsrcs;
1520     vector<COrgMod::ESubtype> orgmods;
1521     orgmods.push_back(COrgMod::eSubtype_isolate);
1522 
1523     CheckDeflineMatches(seq, subsrcs, orgmods);
1524 
1525     // Try again, but deliberately allow modifier that includes taxname to be included
1526     AddTitle(seq, "Trichoderma sp. FPZSP372 isolate FPZSP37 sequence.");
1527     CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
1528 
1529     CRef<CScope> scope(new CScope(*object_manager));
1530     CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*seq);
1531 
1532     objects::CAutoDefWithTaxonomy autodef;
1533 
1534     // add to autodef
1535     autodef.AddSources(seh);
1536 
1537     CRef<CAutoDefModifierCombo> mod_combo;
1538     mod_combo = new CAutoDefModifierCombo();
1539     mod_combo->SetUseModifierLabels(true);
1540     mod_combo->SetAllowModAtEndOfTaxname(true);
1541     mod_combo->SetExcludeSpOrgs(false);
1542     ITERATE(vector<CSubSource::ESubtype>, it, subsrcs) {
1543         mod_combo->AddSubsource(*it, true);
1544     }
1545     ITERATE(vector<COrgMod::ESubtype>, it, orgmods) {
1546         mod_combo->AddOrgMod(*it, true);
1547     }
1548 
1549     autodef.SetFeatureListType(CAutoDefOptions::eListAllFeatures);
1550     autodef.SetMiscFeatRule(CAutoDefOptions::eDelete);
1551 
1552     CheckDeflineMatches(seh, autodef, mod_combo);
1553 }
1554 
BOOST_AUTO_TEST_CASE(Test_SQD_3440)1555 BOOST_AUTO_TEST_CASE(Test_SQD_3440)
1556 {
1557     CAutoDefOptions options;
1558     CAutoDefModifierCombo combo;
1559     combo.InitOptions(options);
1560 
1561     CRef<CUser_object> user = options.MakeUserObject();
1562     BOOST_CHECK_EQUAL(user->GetObjectType(), CUser_object::eObjectType_AutodefOptions);
1563     options.SetUseLabels();
1564     user = options.MakeUserObject();
1565     CheckAutoDefOptions(*user, options);
1566 }
1567 
1568 
BOOST_AUTO_TEST_CASE(Test_RemovableuORF)1569 BOOST_AUTO_TEST_CASE(Test_RemovableuORF)
1570 {
1571     CRef<CSeq_entry> entry = BuildNucProtSet("uORF");
1572     CRef<CSeqdesc> desc = AddSource(entry, "Alcanivorax sp. HA03");
1573     CRef<CSeq_entry> nuc = unit_test_util::GetNucleotideSequenceFromGoodNucProtSet(entry);
1574     AddTitle(nuc, "Alcanivorax sp. HA03 uORF gene, complete cds.");
1575 
1576     CheckDeflineMatches(entry);
1577 
1578     // try again, with another feature present, so uORF isn't lonely
1579     CRef<objects::CSeq_feat> misc = unit_test_util::AddMiscFeature(nuc);
1580     misc->SetData().SetImp().SetKey("repeat_region");
1581     CRef<CGb_qual> q(new CGb_qual("satellite", "x"));
1582     misc->SetQual().push_back(q);
1583     AddTitle(nuc, "Alcanivorax sp. HA03 satellite x sequence.");
1584     CheckDeflineMatches(entry);
1585 
1586     // try again, but set keepORFs flag
1587     CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
1588 
1589     CRef<CScope> scope(new CScope(*object_manager));
1590     CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
1591 
1592     objects::CAutoDefWithTaxonomy autodef;
1593 
1594     // add to autodef
1595     autodef.AddSources(seh);
1596 
1597     CRef<CAutoDefModifierCombo> mod_combo;
1598     mod_combo = new CAutoDefModifierCombo();
1599 
1600     autodef.SetFeatureListType(CAutoDefOptions::eListAllFeatures);
1601     autodef.SetMiscFeatRule(CAutoDefOptions::eDelete);
1602     autodef.SetKeepuORFs(true);
1603 
1604     AddTitle(nuc, "Alcanivorax sp. HA03 uORF gene, complete cds; and satellite x sequence.");
1605     CheckDeflineMatches(seh, autodef, mod_combo);
1606 
1607 }
1608 
BOOST_AUTO_TEST_CASE(Test_RemovableMobileElement)1609 BOOST_AUTO_TEST_CASE(Test_RemovableMobileElement)
1610 {
1611     // first, try with lonely optional
1612     CRef<CSeq_entry> entry = unit_test_util::BuildGoodSeq();
1613     CRef<objects::CSeq_feat> mob_el = unit_test_util::AddMiscFeature(entry);
1614     mob_el->SetData().SetImp().SetKey("mobile_element");
1615     CRef<CGb_qual> met(new CGb_qual("mobile_element_type", "SINE:x"));
1616     mob_el->SetQual().push_back(met);
1617     AddTitle(entry, "Sebaea microphylla SINE x, complete sequence.");
1618 
1619     CheckDeflineMatches(entry);
1620 
1621     // try again, with another feature present, so element isn't lonely
1622     CRef<objects::CSeq_feat> misc = unit_test_util::AddMiscFeature(entry);
1623     misc->SetData().SetImp().SetKey("repeat_region");
1624     CRef<CGb_qual> q(new CGb_qual("satellite", "y"));
1625     misc->SetQual().push_back(q);
1626     misc->SetLocation().SetInt().SetFrom(0);
1627     misc->SetLocation().SetInt().SetTo(10);
1628     mob_el->SetLocation().SetInt().SetFrom(15);
1629     mob_el->SetLocation().SetInt().SetTo(20);
1630     AddTitle(entry, "Sebaea microphylla satellite y sequence.");
1631     CheckDeflineMatches(entry);
1632 
1633     // try again, but set keepMobileElements flag
1634     CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
1635 
1636     CRef<CScope> scope(new CScope(*object_manager));
1637     CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
1638 
1639     objects::CAutoDefWithTaxonomy autodef;
1640 
1641     // add to autodef
1642     autodef.AddSources(seh);
1643 
1644     CRef<CAutoDefModifierCombo> mod_combo;
1645     mod_combo = new CAutoDefModifierCombo();
1646 
1647     autodef.SetFeatureListType(CAutoDefOptions::eListAllFeatures);
1648     autodef.SetMiscFeatRule(CAutoDefOptions::eDelete);
1649     autodef.SetKeepOptionalMobileElements(true);
1650 
1651     AddTitle(entry, "Sebaea microphylla satellite y sequence; and SINE x, complete sequence.");
1652     CheckDeflineMatches(seh, autodef, mod_combo);
1653 
1654     // keep non-optional mobile element when not lonely and flag not set
1655     met->SetVal("transposon:z");
1656     autodef.SetKeepOptionalMobileElements(false);
1657     AddTitle(entry, "Sebaea microphylla satellite y sequence; and transposon z, complete sequence.");
1658     CheckDeflineMatches(seh, autodef, mod_combo);
1659 
1660 }
1661 
BOOST_AUTO_TEST_CASE(GB_5272)1662 BOOST_AUTO_TEST_CASE(GB_5272)
1663 {
1664     CRef<CSeq_entry> entry = BuildNucProtSet("rhodanese-related sulfurtransferase");
1665     CRef<CSeqdesc> desc = AddSource(entry, "Coxiella burnetii");
1666     CRef<CSeq_entry> nuc = unit_test_util::GetNucleotideSequenceFromGoodNucProtSet(entry);
1667     CRef<CSeq_feat> gene(new CSeq_feat());
1668     gene->SetData().SetGene().SetLocus_tag("CBU_0065");
1669     AddFeat(gene, nuc);
1670     CRef<CSeq_feat> cds = unit_test_util::GetCDSFromGoodNucProtSet(entry);
1671     cds->SetLocation().SetPartialStart(true, eExtreme_Biological);
1672     gene->SetLocation().SetPartialStart(true, eExtreme_Biological);
1673     AddTitle(nuc, "Coxiella burnetii rhodanese-related sulfurtransferase (CBU_0065) gene, partial cds.");
1674     CheckDeflineMatches(entry);
1675 }
1676 
BOOST_AUTO_TEST_CASE(GB_5272a)1677 BOOST_AUTO_TEST_CASE(GB_5272a)
1678 {
1679     CRef<CSeq_entry> entry = BuildNucProtSet("hypothetical protein");
1680     CRef<CSeqdesc> desc = AddSource(entry, "Coxiella burnetii");
1681     CRef<CSeq_entry> nuc = unit_test_util::GetNucleotideSequenceFromGoodNucProtSet(entry);
1682     CRef<CSeq_feat> cds = unit_test_util::GetCDSFromGoodNucProtSet(entry);
1683     CRef<CSeq_feat> gene(new CSeq_feat());
1684     gene->SetData().SetGene().SetLocus_tag("CBU_0067");
1685     AddFeat(gene, nuc);
1686     gene->SetLocation().Assign(cds->GetLocation());
1687 
1688     CRef<CSeq_feat> cds2 = unit_test_util::MakeCDSForGoodNucProtSet("nuc", "prot2");
1689     cds2->SetLocation().SetInt().SetFrom(5);
1690     unit_test_util::AddFeat(cds2, entry);
1691     CRef<CSeq_entry> pentry = unit_test_util::MakeProteinForGoodNucProtSet("prot2");
1692     entry->SetSet().SetSeq_set().push_back(pentry);
1693     pentry->SetSeq().SetAnnot().front()->SetData().SetFtable().front()->SetData().SetProt().SetName().front() = "hypothetical protein";
1694     CRef<CSeq_feat> gene2(new CSeq_feat());
1695     gene2->SetData().SetGene().SetLocus_tag("CBU_0068");
1696     AddFeat(gene2, nuc);
1697     gene2->SetLocation().Assign(cds2->GetLocation());
1698 
1699     AddTitle(nuc, "Coxiella burnetii hypothetical protein (CBU_0067) and hypothetical protein (CBU_0068) genes, complete cds.");
1700     CheckDeflineMatches(entry);
1701 
1702     // try again, but suppress genes
1703     CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
1704 
1705     CRef<CScope> scope(new CScope(*object_manager));
1706     CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
1707 
1708     objects::CAutoDefWithTaxonomy autodef;
1709 
1710     // add to autodef
1711     autodef.AddSources(seh);
1712 
1713     CRef<CAutoDefModifierCombo> mod_combo;
1714     mod_combo = new CAutoDefModifierCombo();
1715 
1716     autodef.SuppressFeature(CSeqFeatData::eSubtype_gene);
1717 
1718     AddTitle(nuc, "Coxiella burnetii hypothetical protein genes, complete cds.");
1719     CheckDeflineMatches(seh, autodef, mod_combo);
1720 }
1721 
1722 
BOOST_AUTO_TEST_CASE(GB_5272b)1723 BOOST_AUTO_TEST_CASE(GB_5272b)
1724 {
1725     CRef<CSeq_entry> entry = BuildNucProtSet("hypothetical protein");
1726     CRef<CSeqdesc> desc = AddSource(entry, "Coxiella burnetii");
1727     CRef<CSeq_entry> nuc = unit_test_util::GetNucleotideSequenceFromGoodNucProtSet(entry);
1728 
1729     CRef<CSeq_feat> cds3 = unit_test_util::MakeCDSForGoodNucProtSet("nuc", "prot3");
1730     cds3->SetLocation().SetInt().SetFrom(5);
1731     unit_test_util::AddFeat(cds3, entry);
1732     CRef<CSeq_entry> pentry = unit_test_util::MakeProteinForGoodNucProtSet("prot3");
1733     entry->SetSet().SetSeq_set().push_back(pentry);
1734     pentry->SetSeq().SetAnnot().front()->SetData().SetFtable().front()->SetData().SetProt().SetName().front() = "hypothetical protein";
1735 
1736     AddTitle(nuc, "Coxiella burnetii hypothetical protein genes, complete cds.");
1737     CheckDeflineMatches(entry);
1738 
1739     // try again, but with intervening non-hypothetical protein gene
1740     CRef<CSeq_feat> cds2 = unit_test_util::MakeCDSForGoodNucProtSet("nuc", "prot2");
1741     cds2->SetLocation().SetInt().SetFrom(3);
1742     unit_test_util::AddFeat(cds2, entry);
1743     CRef<CSeq_entry> pentry2 = unit_test_util::MakeProteinForGoodNucProtSet("prot2");
1744     entry->SetSet().SetSeq_set().push_back(pentry2);
1745     pentry2->SetSeq().SetAnnot().front()->SetData().SetFtable().front()->SetData().SetProt().SetName().front() = "fake protein";
1746 
1747     AddTitle(nuc, "Coxiella burnetii hypothetical protein, fake protein, and hypothetical protein genes, complete cds.");
1748     CheckDeflineMatches(entry);
1749 
1750 }
1751 
1752 
BOOST_AUTO_TEST_CASE(SQD_3462)1753 BOOST_AUTO_TEST_CASE(SQD_3462)
1754 {
1755     CRef<CSeq_entry> entry = BuildNucProtSet("brahma protein");
1756     CRef<CSeqdesc> desc = AddSource(entry, "Anas castanea");
1757     unit_test_util::SetOrgMod(entry, COrgMod::eSubtype_isolate, "DPIWECT127");
1758     CRef<CSeq_feat> cds = unit_test_util::GetCDSFromGoodNucProtSet(entry);
1759     cds->SetLocation().SetInt().SetTo(8);
1760     cds->SetLocation().SetPartialStart(true, eExtreme_Biological);
1761     cds->SetLocation().SetPartialStop(true, eExtreme_Biological);
1762     CRef<CSeq_entry> nuc = unit_test_util::GetNucleotideSequenceFromGoodNucProtSet(entry);
1763     CRef<CSeq_feat> exon = unit_test_util::AddMiscFeature(nuc);
1764     exon->ResetComment();
1765     exon->SetData().SetImp().SetKey("exon");
1766     exon->SetLocation().SetInt().SetFrom(0);
1767     exon->SetLocation().SetInt().SetTo(8);
1768     CRef<CGb_qual> exon_number(new CGb_qual("number", "15"));
1769     exon->SetQual().push_back(exon_number);
1770     CRef<CSeq_feat> intron = unit_test_util::AddMiscFeature(nuc);
1771     intron->ResetComment();
1772     intron->SetData().SetImp().SetKey("intron");
1773     intron->SetLocation().SetInt().SetFrom(9);
1774     intron->SetLocation().SetInt().SetTo(nuc->GetSeq().GetLength() - 1);
1775     CRef<CGb_qual> intron_number(new CGb_qual("number", "15"));
1776     intron->SetQual().push_back(intron_number);
1777 
1778     CRef<CSeq_feat> gene = unit_test_util::AddMiscFeature(nuc);
1779     gene->ResetComment();
1780     gene->SetData().SetGene().SetLocus("BRM");
1781     gene->SetLocation().SetInt().SetTo(nuc->GetSeq().GetLength() - 1);
1782 
1783     objects::CAutoDefWithTaxonomy autodef;
1784 
1785     CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
1786 
1787     CRef<CScope> scope(new CScope(*object_manager));
1788     CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
1789     autodef.AddSources(seh);
1790     autodef.SetKeepExons(true);
1791     autodef.SetKeepIntrons(true);
1792 
1793     CRef<CAutoDefModifierCombo> mod_combo(new CAutoDefModifierCombo());
1794     mod_combo->AddOrgMod(COrgMod::eSubtype_isolate, true);
1795     mod_combo->SetUseModifierLabels(true);
1796 
1797     AddTitle(nuc, "Anas castanea isolate DPIWECT127 brahma protein (BRM) gene, exon 15, intron 15, and partial cds.");
1798     CheckDeflineMatches(seh, autodef, mod_combo);
1799 
1800 }
1801 
BOOST_AUTO_TEST_CASE(Test_IsModifierInString)1802 BOOST_AUTO_TEST_CASE(Test_IsModifierInString)
1803 {
1804     // in the string, but ignore because it's at the end
1805     BOOST_CHECK_EQUAL(CAutoDefModifierCombo::IsModifierInString("abc", "abc", true), false);
1806     // in the string, report even at end
1807     BOOST_CHECK_EQUAL(CAutoDefModifierCombo::IsModifierInString("abc", "abc", false), true);
1808     // ignore because not whole word
1809     BOOST_CHECK_EQUAL(CAutoDefModifierCombo::IsModifierInString("abc", "tabc", false), false);
1810     // ignore because not whole word
1811     BOOST_CHECK_EQUAL(CAutoDefModifierCombo::IsModifierInString("abc", "abcq", false), false);
1812     // skip first match because not whole word, find second match
1813     BOOST_CHECK_EQUAL(CAutoDefModifierCombo::IsModifierInString("abc", "abcq abc", false), true);
1814 
1815 
1816 }
1817 
1818 
BOOST_AUTO_TEST_CASE(Test_IsUsableInDefline)1819 BOOST_AUTO_TEST_CASE(Test_IsUsableInDefline)
1820 {
1821     BOOST_CHECK_EQUAL(CAutoDefModifierCombo::IsUsableInDefline(CSubSource::eSubtype_plasmid_name), true);
1822     BOOST_CHECK_EQUAL(CAutoDefModifierCombo::IsUsableInDefline(CSubSource::eSubtype_collected_by), false);
1823     BOOST_CHECK_EQUAL(CAutoDefModifierCombo::IsUsableInDefline(COrgMod::eSubtype_strain), true);
1824     BOOST_CHECK_EQUAL(CAutoDefModifierCombo::IsUsableInDefline(COrgMod::eSubtype_variety), false);
1825 }
1826 
1827 
BOOST_AUTO_TEST_CASE(Test_GB_5493)1828 BOOST_AUTO_TEST_CASE(Test_GB_5493)
1829 {
1830     // first, try with lonely optional
1831     CRef<CSeq_entry> entry = unit_test_util::BuildGoodSeq();
1832     CRef<objects::CSeq_feat> miscrna = unit_test_util::AddMiscFeature(entry);
1833     miscrna->SetData().SetRna().SetType(CRNA_ref::eType_other);
1834     string remainder;
1835     miscrna->SetData().SetRna().SetRnaProductName("trans-spliced leader sequence SL", remainder);
1836     miscrna->SetComment("mini-exon");
1837     AddTitle(entry, "Sebaea microphylla trans-spliced leader sequence SL gene, complete sequence.");
1838 
1839     CheckDeflineMatches(entry);
1840 }
1841 
1842 
BOOST_AUTO_TEST_CASE(Test_TargetedLocusName)1843 BOOST_AUTO_TEST_CASE(Test_TargetedLocusName)
1844 {
1845     CAutoDefOptions options;
1846     options.SetTargetedLocusName("consensus string");
1847     BOOST_CHECK_EQUAL(options.GetTargetedLocusName(), "consensus string");
1848     CRef<CUser_object> user = options.MakeUserObject();
1849     BOOST_CHECK_EQUAL(HasStringField(*user, "Targeted Locus Name", "consensus string"), 1);
1850 
1851     options.SetTargetedLocusName("other");
1852     BOOST_CHECK_EQUAL(options.GetTargetedLocusName(), "other");
1853     options.InitFromUserObject(*user);
1854     BOOST_CHECK_EQUAL(options.GetTargetedLocusName(), "consensus string");
1855 
1856 
1857 }
1858 
1859 
BOOST_AUTO_TEST_CASE(Test_SQD_3602)1860 BOOST_AUTO_TEST_CASE(Test_SQD_3602)
1861 {
1862     CRef<CSeq_entry> entry = unit_test_util::BuildGoodSeq();
1863     unit_test_util::SetGenome(entry, CBioSource::eGenome_mitochondrion);
1864     CRef<objects::CSeq_feat> misc = unit_test_util::AddMiscFeature(entry);
1865     misc->SetComment("contains tRNA-Pro gene, control region, tRNA-Phe  gene, and 12S ribosomal RNA gene");
1866     misc->SetLocation().SetPartialStart(true, eExtreme_Biological);
1867     misc->SetLocation().SetPartialStop(true, eExtreme_Biological);
1868     AddTitle(entry, "Sebaea microphylla tRNA-Pro gene, partial sequence; control region and tRNA-Phe gene, complete sequence; and 12S ribosomal RNA gene, partial sequence; mitochondrial.");
1869 
1870     CheckDeflineMatches(entry);
1871 
1872 }
1873 
1874 
BOOST_AUTO_TEST_CASE(Test_SB_5494)1875 BOOST_AUTO_TEST_CASE(Test_SB_5494)
1876 {
1877     CRef<CSeq_entry> entry = unit_test_util::BuildGoodSeq();
1878     unit_test_util::SetGenome(entry, CBioSource::eGenome_mitochondrion);
1879     CRef<objects::CSeq_feat> misc = unit_test_util::AddMiscFeature(entry);
1880     misc->SetComment("contains 12S ribosomal RNA gene, tRNA-Val (trnV) gene, and 16S ribosomal RNA gene");
1881     misc->SetLocation().SetPartialStart(true, eExtreme_Biological);
1882     misc->SetLocation().SetPartialStop(true, eExtreme_Biological);
1883     AddTitle(entry, "Sebaea microphylla 12S ribosomal RNA gene, partial sequence; tRNA-Val (trnV) gene, complete sequence; and 16S ribosomal RNA gene, partial sequence; mitochondrial.");
1884 
1885     CheckDeflineMatches(entry);
1886 }
1887 
1888 
BOOST_AUTO_TEST_CASE(Test_GB_5447)1889 BOOST_AUTO_TEST_CASE(Test_GB_5447)
1890 {
1891     CRef<CSeq_entry> entry = unit_test_util::BuildGoodNucProtSet();
1892     CRef<CSeq_entry> nuc = unit_test_util::GetNucleotideSequenceFromGoodNucProtSet(entry);
1893     CRef<CSeq_feat> cds1 = unit_test_util::GetCDSFromGoodNucProtSet(entry);
1894     CRef<CSeq_feat> prot1 = unit_test_util::GetProtFeatFromGoodNucProtSet(entry);
1895     prot1->SetData().SetProt().SetName().front() = "hypothetical protein";
1896     CRef<CSeq_feat> cds2 = unit_test_util::AddMiscFeature(nuc);
1897     cds2->SetData().SetCdregion();
1898     cds2->ResetComment();
1899     cds2->SetLocation().SetInt().SetFrom(cds1->GetLocation().GetStart(eExtreme_Positional));
1900     cds2->SetLocation().SetInt().SetTo(nuc->GetSeq().GetInst().GetLength() - 1);
1901 
1902     CRef<CSeq_entry> pentry(new CSeq_entry());
1903     pentry->SetSeq().SetInst().SetMol(objects::CSeq_inst::eMol_aa);
1904     pentry->SetSeq().SetInst().SetRepr(objects::CSeq_inst::eRepr_raw);
1905     pentry->SetSeq().SetInst().SetSeq_data().SetIupacaa().Set("MPRKTEIN");
1906     pentry->SetSeq().SetInst().SetLength(8);
1907 
1908     CRef<objects::CSeq_id> pid(new objects::CSeq_id());
1909     pid->SetLocal().SetStr("prot2");
1910     pentry->SetSeq().SetId().push_back(pid);
1911     entry->SetSet().SetSeq_set().push_back(pentry);
1912     cds2->SetProduct().SetWhole().SetLocal().SetStr("prot2");
1913     CRef<CSeq_feat> prot2 = unit_test_util::AddProtFeat(pentry);
1914     prot2->SetData().SetProt().SetName().front() = "hypothetical protein";
1915 
1916     AddTitle(nuc, "Sebaea microphylla hypothetical protein genes, complete cds.");
1917     CheckDeflineMatches(entry, true);
1918 
1919 }
1920 
1921 
MakeRegulatoryFeatureTest(const string & regulatory_class,const string & defline_interval,bool use_fake_promoters,bool keep_regulatory)1922 void MakeRegulatoryFeatureTest(const string& regulatory_class, const string& defline_interval, bool use_fake_promoters, bool keep_regulatory)
1923 {
1924     objects::CAutoDefWithTaxonomy autodef;
1925     CRef<CAutoDefModifierCombo> mod_combo(new CAutoDefModifierCombo());
1926 
1927     CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
1928     CRef<CScope> scope(new CScope(*object_manager));
1929 
1930     CRef<CSeq_entry> entry = unit_test_util::BuildGoodNucProtSet();
1931     CRef<CSeq_entry> nuc = unit_test_util::GetNucleotideSequenceFromGoodNucProtSet(entry);
1932     if (!NStr::IsBlank(regulatory_class)) {
1933         CRef<objects::CSeq_feat> feat = unit_test_util::AddMiscFeature(entry);
1934         feat->SetData().SetImp().SetKey("regulatory");
1935         CRef<CGb_qual> q(new CGb_qual("regulatory_class", regulatory_class));
1936         feat->SetQual().push_back(q);
1937     }
1938     AddTitle(nuc, "Sebaea microphylla fake protein name gene, " + defline_interval);
1939     CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
1940     autodef.AddSources(seh);
1941     if (use_fake_promoters) {
1942         autodef.SetUseFakePromoters(true);
1943     }
1944     if (keep_regulatory) {
1945         autodef.SetKeepRegulatoryFeatures(true);
1946     }
1947 
1948     CheckDeflineMatches(seh, autodef, mod_combo);
1949     scope->RemoveTopLevelSeqEntry(seh);
1950 
1951 }
1952 
1953 
BOOST_AUTO_TEST_CASE(GB_5537)1954 BOOST_AUTO_TEST_CASE(GB_5537)
1955 {
1956     // a sequence with no promoter, but we set the FakePromoter flag
1957     MakeRegulatoryFeatureTest(kEmptyStr, "promoter region and complete cds.", true, false);
1958 
1959     // a sequence with a promoter, but no flags
1960     MakeRegulatoryFeatureTest("promoter", "complete cds.", false, false);
1961 
1962     // a sequence with a promoter, set the FakePromoter flag
1963     MakeRegulatoryFeatureTest("promoter", "promoter region and complete cds.", true, false);
1964 
1965     // a sequence with a promoter, set keep regulatory
1966     MakeRegulatoryFeatureTest("promoter", "promoter region and complete cds.", false, true);
1967 
1968     // a sequence with a promoter, set keep regulatory and FakePromoter
1969     MakeRegulatoryFeatureTest("promoter", "promoter region and complete cds.", true, true);
1970 
1971     // a sequence with an enhancer, but no flags
1972     MakeRegulatoryFeatureTest("enhancer", "complete cds.", false, false);
1973 
1974     // a sequence with an enhancer, set fake promoters flag
1975     MakeRegulatoryFeatureTest("enhancer", "promoter region and complete cds.", true, false);
1976 
1977     // a sequence with an enhancer, set keep regulatory
1978     MakeRegulatoryFeatureTest("enhancer", "enhancer and complete cds.", false, true);
1979 
1980 }
1981 
1982 
BOOST_AUTO_TEST_CASE(Test_AutodefOptionsSpecifyNuclearCopyFlag)1983 BOOST_AUTO_TEST_CASE(Test_AutodefOptionsSpecifyNuclearCopyFlag)
1984 {
1985     CAutoDefOptions opts;
1986 
1987     opts.SetNuclearCopyFlag(CBioSource::eGenome_mitochondrion);
1988     CRef<CUser_object> user = opts.MakeUserObject();
1989     BOOST_CHECK_EQUAL(HasStringField(*user, "NuclearCopyFlag", "mitochondrion"), 1);
1990 
1991 }
1992 
1993 
BOOST_AUTO_TEST_CASE(Test_GB_5560)1994 BOOST_AUTO_TEST_CASE(Test_GB_5560)
1995 {
1996     CRef<CSeq_entry> entry = unit_test_util::BuildGoodSeq();
1997     CRef<objects::CSeq_feat> misc = unit_test_util::AddMiscFeature(entry);
1998     misc->ResetComment();
1999     misc->SetData().SetImp().SetKey("repeat_region");
2000     CRef<CGb_qual> q(new CGb_qual("rpt_type", "long_terminal_repeat"));
2001     misc->SetQual().push_back(q);
2002     AddTitle(entry, "Sebaea microphylla LTR repeat region.");
2003 
2004     CheckDeflineMatches(entry);
2005 }
2006 
2007 
BOOST_AUTO_TEST_CASE(Test_GB_5758)2008 BOOST_AUTO_TEST_CASE(Test_GB_5758)
2009 {
2010     CRef<CSeq_entry> entry = unit_test_util::BuildGoodSeq();
2011     unit_test_util::SetSubSource(entry, CSubSource::eSubtype_other, "a; minicircle b; c");
2012     AddTitle(entry, "Sebaea microphylla minicircle b sequence.");
2013     CheckDeflineMatches(entry);
2014 
2015     AddTitle(entry, "Sebaea microphylla a minicircle b sequence.");
2016 
2017     vector<CSubSource::ESubtype> subsrcs;
2018     subsrcs.push_back(CSubSource::eSubtype_other);
2019     vector<COrgMod::ESubtype> orgmods;
2020     CheckDeflineMatches(entry, subsrcs, orgmods);
2021 }
2022 
2023 
TestForRecomb(CRef<CSeq_entry> entry,const string & expected)2024 void TestForRecomb(CRef<CSeq_entry> entry, const string& expected)
2025 {
2026     AddTitle(entry, expected);
2027     objects::CAutoDefWithTaxonomy autodef;
2028     CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
2029     CRef<CScope> scope(new CScope(*object_manager));
2030     CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
2031     autodef.AddSources(seh);
2032     CRef<CAutoDefModifierCombo> mod_combo(new CAutoDefModifierCombo());
2033     autodef.SetFeatureListType(CAutoDefOptions::eListAllFeatures);
2034     autodef.SetKeepMiscRecomb(true);
2035     CheckDeflineMatches(seh, autodef, mod_combo);
2036 }
2037 
2038 
BOOST_AUTO_TEST_CASE(Test_GB_5793)2039 BOOST_AUTO_TEST_CASE(Test_GB_5793)
2040 {
2041     CRef<CSeq_entry> entry = unit_test_util::BuildGoodSeq();
2042     CRef<CSeq_feat> m = unit_test_util::AddMiscFeature(entry);
2043     m->SetData().SetImp().SetKey("misc_recomb");
2044     m->SetComment("GCC2-ALK translocation breakpoint junction; microhomology");
2045 
2046     // by default, misc_recomb not included
2047     AddTitle(entry, "Sebaea microphylla sequence.");
2048     CheckDeflineMatches(entry);
2049 
2050     // use option to show misc_recomb
2051     TestForRecomb(entry, "Sebaea microphylla GCC2-ALK translocation breakpoint junction genomic sequence.");
2052 
2053     // prefer recombination_class qualifier
2054     m->SetQual().push_back(CRef<CGb_qual>(new CGb_qual("recombination_class", "mitotic_recombination")));
2055     TestForRecomb(entry, "Sebaea microphylla mitotic_recombination genomic sequence.");
2056 }
2057 
2058 
BOOST_AUTO_TEST_CASE(Test_GB_5765)2059 BOOST_AUTO_TEST_CASE(Test_GB_5765)
2060 {
2061     CRef<CSeq_entry> entry = unit_test_util::BuildGoodSeq();
2062     CRef<CSeq_feat> m = unit_test_util::AddMiscFeature(entry);
2063     AddTitle(entry, "Sebaea microphylla special flower.");
2064     objects::CAutoDefWithTaxonomy autodef;
2065     CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
2066     CRef<CScope> scope(new CScope(*object_manager));
2067     CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
2068     autodef.AddSources(seh);
2069     CRef<CAutoDefModifierCombo> mod_combo(new CAutoDefModifierCombo());
2070     autodef.SetFeatureListType(CAutoDefOptions::eListAllFeatures);
2071     autodef.SetCustomFeatureClause("special flower");
2072     CheckDeflineMatches(seh, autodef, mod_combo);
2073 }
2074 
2075 
BOOST_AUTO_TEST_CASE(Test_SQD_3914)2076 BOOST_AUTO_TEST_CASE(Test_SQD_3914)
2077 {
2078     CRef<CSeq_entry> entry = unit_test_util::BuildGoodSeq();
2079     CRef<CSeq_feat> m = unit_test_util::AddMiscFeature(entry);
2080     m->SetData().SetRna().SetType(CRNA_ref::eType_miscRNA);
2081     m->SetComment("contains 16S-23S ribosomal RNA intergenic spacer, tRNA-Ile(trnI), and tRNA-Ala(trnA)");
2082     AddTitle(entry, "Sebaea microphylla 16S-23S ribosomal RNA intergenic spacer, tRNA-Ile (trnI) and tRNA-Ala (trnA) genes, complete sequence.");
2083     CheckDeflineMatches(entry);
2084 }
2085 
2086 
BOOST_AUTO_TEST_CASE(Test_CAutoDefAvailableModifier_GetOrgModLabel)2087 BOOST_AUTO_TEST_CASE(Test_CAutoDefAvailableModifier_GetOrgModLabel)
2088 {
2089     BOOST_CHECK_EQUAL(CAutoDefAvailableModifier::GetOrgModLabel(COrgMod::eSubtype_culture_collection), "culture");
2090     BOOST_CHECK_EQUAL(CAutoDefAvailableModifier::GetOrgModLabel(COrgMod::eSubtype_acronym), "acronym");
2091     BOOST_CHECK_EQUAL(CAutoDefAvailableModifier::GetOrgModLabel(COrgMod::eSubtype_strain), "strain");
2092     BOOST_CHECK_EQUAL(CAutoDefAvailableModifier::GetOrgModLabel(COrgMod::eSubtype_substrain), "substrain");
2093     BOOST_CHECK_EQUAL(CAutoDefAvailableModifier::GetOrgModLabel(COrgMod::eSubtype_type), "type");
2094     BOOST_CHECK_EQUAL(CAutoDefAvailableModifier::GetOrgModLabel(COrgMod::eSubtype_subtype), "subtype");
2095     BOOST_CHECK_EQUAL(CAutoDefAvailableModifier::GetOrgModLabel(COrgMod::eSubtype_variety), "variety");
2096     BOOST_CHECK_EQUAL(CAutoDefAvailableModifier::GetOrgModLabel(COrgMod::eSubtype_serotype), "serotype");
2097     BOOST_CHECK_EQUAL(CAutoDefAvailableModifier::GetOrgModLabel(COrgMod::eSubtype_serogroup), "serogroup");
2098     BOOST_CHECK_EQUAL(CAutoDefAvailableModifier::GetOrgModLabel(COrgMod::eSubtype_serovar), "serovar");
2099     BOOST_CHECK_EQUAL(CAutoDefAvailableModifier::GetOrgModLabel(COrgMod::eSubtype_cultivar), "cultivar");
2100     BOOST_CHECK_EQUAL(CAutoDefAvailableModifier::GetOrgModLabel(COrgMod::eSubtype_pathovar), "pathovar");
2101     BOOST_CHECK_EQUAL(CAutoDefAvailableModifier::GetOrgModLabel(COrgMod::eSubtype_chemovar), "chemovar");
2102     BOOST_CHECK_EQUAL(CAutoDefAvailableModifier::GetOrgModLabel(COrgMod::eSubtype_biovar), "biovar");
2103     BOOST_CHECK_EQUAL(CAutoDefAvailableModifier::GetOrgModLabel(COrgMod::eSubtype_biotype), "biotype");
2104     BOOST_CHECK_EQUAL(CAutoDefAvailableModifier::GetOrgModLabel(COrgMod::eSubtype_group), "group");
2105     BOOST_CHECK_EQUAL(CAutoDefAvailableModifier::GetOrgModLabel(COrgMod::eSubtype_subgroup), "subgroup");
2106     BOOST_CHECK_EQUAL(CAutoDefAvailableModifier::GetOrgModLabel(COrgMod::eSubtype_isolate), "isolate");
2107     BOOST_CHECK_EQUAL(CAutoDefAvailableModifier::GetOrgModLabel(COrgMod::eSubtype_authority), "authority");
2108     BOOST_CHECK_EQUAL(CAutoDefAvailableModifier::GetOrgModLabel(COrgMod::eSubtype_forma), "forma");
2109     BOOST_CHECK_EQUAL(CAutoDefAvailableModifier::GetOrgModLabel(COrgMod::eSubtype_ecotype), "ecotype");
2110     BOOST_CHECK_EQUAL(CAutoDefAvailableModifier::GetOrgModLabel(COrgMod::eSubtype_synonym), "synonym");
2111     BOOST_CHECK_EQUAL(CAutoDefAvailableModifier::GetOrgModLabel(COrgMod::eSubtype_anamorph), "anamorph");
2112     BOOST_CHECK_EQUAL(CAutoDefAvailableModifier::GetOrgModLabel(COrgMod::eSubtype_teleomorph), "teleomorph");
2113     BOOST_CHECK_EQUAL(CAutoDefAvailableModifier::GetOrgModLabel(COrgMod::eSubtype_breed), "breed");
2114 }
2115 
2116 
BOOST_AUTO_TEST_CASE(Test_GB_5618)2117 BOOST_AUTO_TEST_CASE(Test_GB_5618)
2118 {
2119     CRef<CSeq_entry> entry = unit_test_util::BuildGoodSeq();
2120     CRef<CSeq_feat> utr3 = unit_test_util::AddMiscFeature(entry);
2121     utr3->SetLocation().SetInt().SetFrom(0);
2122     utr3->SetLocation().SetInt().SetTo(10);
2123     CRef<CSeq_feat> gene1 = unit_test_util::MakeGeneForFeature(utr3);
2124     unit_test_util::AddFeat(gene1, entry);
2125     CRef<CSeq_feat> utr5 = unit_test_util::AddMiscFeature(entry);
2126     utr5->SetLocation().SetInt().SetFrom(20);
2127     utr5->SetLocation().SetInt().SetTo(25);
2128     CRef<CSeq_feat> gene2 = unit_test_util::MakeGeneForFeature(utr5);
2129     unit_test_util::AddFeat(gene2, entry);
2130 
2131     string defline = "Sebaea microphylla gene locus gene, complete sequence.";
2132     AddTitle(entry, defline);
2133 
2134     objects::CAutoDefWithTaxonomy autodef;
2135 
2136     CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
2137 
2138     CRef<CScope> scope(new CScope(*object_manager));
2139     CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
2140     autodef.AddSources(seh);
2141     autodef.SetKeep3UTRs(true);
2142     autodef.SetKeep5UTRs(true);
2143 
2144     CRef<CAutoDefModifierCombo> mod_combo(new CAutoDefModifierCombo());
2145     mod_combo->AddOrgMod(COrgMod::eSubtype_isolate, true);
2146     mod_combo->SetUseModifierLabels(true);
2147 
2148     defline = "Sebaea microphylla gene locus gene, 5' UTR and 3' UTR.";
2149     CheckDeflineMatches(seh, autodef, mod_combo);
2150 }
2151 
2152 
BOOST_AUTO_TEST_CASE(Test_GB_6375)2153 BOOST_AUTO_TEST_CASE(Test_GB_6375)
2154 {
2155     // suppress if no number
2156     CRef<CSeq_entry> nps = unit_test_util::BuildGoodNucProtSet();
2157     CRef<CSeq_entry> nuc = unit_test_util::GetNucleotideSequenceFromGoodNucProtSet(nps);
2158     CRef<CSeq_feat> cds = unit_test_util::GetCDSFromGoodNucProtSet(nps);
2159     cds->SetLocation().SetPartialStop(true, eExtreme_Biological);
2160     AddExon(nuc, "", cds->GetLocation().GetStart(eExtreme_Positional));
2161     string defline = "Sebaea microphylla fake protein name gene, partial cds.";
2162     AddTitle(nuc, defline);
2163     CheckDeflineMatches(nps, true);
2164 
2165     // show if has number
2166     nps = unit_test_util::BuildGoodNucProtSet();
2167     nuc = unit_test_util::GetNucleotideSequenceFromGoodNucProtSet(nps);
2168     cds = unit_test_util::GetCDSFromGoodNucProtSet(nps);
2169     cds->SetLocation().SetPartialStop(true, eExtreme_Biological);
2170     AddExon(nuc, "1", cds->GetLocation().GetStart(eExtreme_Positional));
2171     defline = "Sebaea microphylla fake protein name gene, exon 1 and partial cds.";
2172     AddTitle(nuc, defline);
2173     CheckDeflineMatches(nps, true);
2174 
2175     // suppress if coding region complete
2176     cds->SetLocation().SetPartialStop(false, eExtreme_Biological);
2177     defline = "Sebaea microphylla fake protein name gene, complete cds.";
2178     AddTitle(nuc, defline);
2179     CheckDeflineMatches(nps, true);
2180 
2181 }
2182 
2183 
BOOST_AUTO_TEST_CASE(Test_GB_6557)2184 BOOST_AUTO_TEST_CASE(Test_GB_6557)
2185 {
2186     // nuclear gene for X product (but not for macronuclear)
2187     CRef<CSeq_entry> nps = unit_test_util::BuildGoodNucProtSet();
2188     CRef<CSeq_entry> nuc = unit_test_util::GetNucleotideSequenceFromGoodNucProtSet(nps);
2189     CRef<CSeq_feat> prot = unit_test_util::GetProtFeatFromGoodNucProtSet(nps);
2190     prot->SetData().SetProt().SetName().front() = "LIA2 macronuclear isoform";
2191 
2192     string defline = "Sebaea microphylla LIA2 macronuclear isoform gene, complete cds.";
2193     AddTitle(nuc, defline);
2194     CheckDeflineMatches(nps, true);
2195 
2196     // apicoplast
2197     prot->SetData().SetProt().SetName().front() = "LIA2 apicoplast protein";
2198     defline = "Sebaea microphylla LIA2 apicoplast protein gene, complete cds; nuclear gene for apicoplast product.";
2199     AddTitle(nuc, defline);
2200     CheckDeflineMatches(nps, true);
2201 
2202 }
2203 
2204 
BOOST_AUTO_TEST_CASE(Test_SQD_4185)2205 BOOST_AUTO_TEST_CASE(Test_SQD_4185)
2206 {
2207     CRef<CSeq_entry> seq = unit_test_util::BuildGoodNucProtSet();
2208     unit_test_util::SetTaxname(seq, "Influenza A virus (A/USA/RVD1_H1/2011(H1N1))");
2209     unit_test_util::SetSubSource(seq, CSubSource::eSubtype_segment, "4");
2210     string defline = "Influenza A virus (A/USA/RVD1_H1/2011(H1N1)) segment 4 hemagglutinin (HA) gene, complete cds.";
2211     CRef<CSeq_entry> nuc = unit_test_util::GetNucleotideSequenceFromGoodNucProtSet(seq);
2212     AddTitle(nuc, defline);
2213     unit_test_util::SetNucProtSetProductName(seq, "hemagglutinin");
2214     CRef<CSeq_feat> gene(new CSeq_feat());
2215     gene->SetData().SetGene().SetLocus("HA");
2216     AddFeat(gene, nuc);
2217 
2218     CheckDeflineMatches(seq, true);
2219 }
2220 
2221 
BOOST_AUTO_TEST_CASE(Test_GB_6690)2222 BOOST_AUTO_TEST_CASE(Test_GB_6690)
2223 {
2224     // do not include notes in deflines when calculating uniqueness
2225     CRef<CSeq_entry> entry = unit_test_util::BuildGoodEcoSet();
2226     vector<string> notes = { "a", "b", "c" };
2227     vector<string>::iterator nit = notes.begin();
2228     NON_CONST_ITERATE(CBioseq_set::TSeq_set, it, entry->SetSet().SetSeq_set()) {
2229         AddTitle(*it, "Sebaea microphylla sequence.");
2230         unit_test_util::SetOrgMod(*it, COrgMod::eSubtype_other, *nit);
2231         ++nit;
2232     }
2233     entry->SetSet().ResetDescr();
2234     AddTitle(entry, "Sebaea microphylla sequence.");
2235 
2236     CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
2237 
2238     CRef<CScope> scope(new CScope(*object_manager));
2239     CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
2240 
2241     objects::CAutoDef autodef;
2242     autodef.AddSources(seh);
2243 
2244     CRef<CAutoDefModifierCombo> mod_combo = autodef.FindBestModifierCombo();
2245     BOOST_CHECK_EQUAL(mod_combo->HasOrgMod(COrgMod::eSubtype_other), false);
2246     BOOST_CHECK_EQUAL(mod_combo->HasSubSource(CSubSource::eSubtype_other), false);
2247 
2248     CheckDeflineMatches(entry, true);
2249 
2250     scope->RemoveTopLevelSeqEntry(seh);
2251 
2252     nit = notes.begin();
2253     NON_CONST_ITERATE(CBioseq_set::TSeq_set, it, entry->SetSet().SetSeq_set()) {
2254         unit_test_util::SetOrgMod(*it, COrgMod::eSubtype_other, "");
2255         unit_test_util::SetOrgMod(*it, CSubSource::eSubtype_other, *nit);
2256         ++nit;
2257     }
2258 
2259     seh = scope->AddTopLevelSeqEntry(*entry);
2260     objects::CAutoDef autodef2;
2261     autodef2.AddSources(seh);
2262     mod_combo = autodef.FindBestModifierCombo();
2263     BOOST_CHECK_EQUAL(mod_combo->HasOrgMod(COrgMod::eSubtype_other), false);
2264     BOOST_CHECK_EQUAL(mod_combo->HasSubSource(CSubSource::eSubtype_other), false);
2265 
2266     CheckDeflineMatches(entry, true);
2267 }
2268 
2269 
MkField(const string & label,const string & val)2270 CRef<CUser_field> MkField(const string& label, const string& val)
2271 {
2272     CRef<CUser_field> f(new CUser_field());
2273     f->SetLabel().SetStr(label);
2274     f->SetData().SetStr(val);
2275     return f;
2276 }
2277 
2278 
BOOST_AUTO_TEST_CASE(Test_HumanSTR)2279 BOOST_AUTO_TEST_CASE(Test_HumanSTR)
2280 {
2281     CRef<CSeq_entry> entry = unit_test_util::BuildGoodSeq();
2282     CRef<CUser_object> obj(new CUser_object());
2283     obj->SetType().SetStr("StructuredComment");
2284     obj->SetData().push_back(MkField("StructuredCommentPrefix", "##HumanSTR-START##"));
2285     obj->SetData().push_back(MkField("STR locus name", "TPOX"));
2286     obj->SetData().push_back(MkField("Length-based allele", "7"));
2287     obj->SetData().push_back(MkField("Bracketed repeat", "[AATG]7"));
2288     CRef<CSeqdesc> d(new CSeqdesc());
2289     d->SetUser().Assign(*obj);
2290     entry->SetSeq().SetDescr().Set().push_back(d);
2291 
2292     CRef<CSeq_feat> var = unit_test_util::AddMiscFeature(entry);
2293     var->SetData().SetImp().SetKey("variation");
2294     CRef<CDbtag> dbtag(new CDbtag());
2295     dbtag->SetDb("dbSNP");
2296     dbtag->SetTag().SetStr("rs115644759");
2297     var->SetDbxref().push_back(dbtag);
2298 
2299     string defline = "Sebaea microphylla microsatellite TPOX 7 [AATG]7 rs115644759 sequence.";
2300     AddTitle(entry, defline);
2301 
2302     CheckDeflineMatches(entry);
2303 
2304 }
2305 
2306 
BOOST_AUTO_TEST_CASE(Test_GB_7071)2307 BOOST_AUTO_TEST_CASE(Test_GB_7071)
2308 {
2309     CRef<CSeq_entry> entry = unit_test_util::BuildGoodSeq();
2310 
2311     CRef<CSeq_feat> intron = unit_test_util::AddMiscFeature(entry);
2312     intron->SetData().SetImp().SetKey("intron");
2313     intron->SetComment("group A");
2314 
2315     string defline = "Sebaea microphylla intron.";
2316     AddTitle(entry, defline);
2317 
2318     CheckDeflineMatches(entry);
2319 
2320 }
2321 
2322 
BOOST_AUTO_TEST_CASE(Test_GB_7479)2323 BOOST_AUTO_TEST_CASE(Test_GB_7479)
2324 {
2325     CRef<CSeq_entry> entry = unit_test_util::BuildGoodSeq();
2326 
2327     CRef<CSeq_feat> cds = unit_test_util::AddMiscFeature(entry);
2328     cds->SetData().SetCdregion();
2329     cds->ResetComment();
2330     cds->SetLocation().SetPartialStop(true, eExtreme_Biological);
2331 
2332     CRef<CSeq_feat> gene = unit_test_util::AddMiscFeature(entry);
2333     gene->SetData().SetGene().SetDesc("cullin 1");
2334     gene->ResetComment();
2335     gene->SetLocation().SetPartialStop(true, eExtreme_Biological);
2336     gene->SetQual().push_back(CRef<CGb_qual>(new CGb_qual("pseudogene", "allelic")));
2337 
2338     string defline = "Sebaea microphylla cullin 1 pseudogene, partial sequence.";
2339     AddTitle(entry, defline);
2340 
2341     CheckDeflineMatches(entry);
2342 }
2343 
2344 
CheckInfluenzaDefline(const string & taxname,const string & strain,const string & serotype,const string & clone,const string & segment,const string & defline)2345 void CheckInfluenzaDefline(const string& taxname, const string& strain, const string& serotype, const string& clone, const string& segment, const string& defline)
2346 {
2347     CRef<CSeq_entry> entry = unit_test_util::BuildGoodSeq();
2348     unit_test_util::SetTaxname(entry, taxname);
2349     if (!NStr::IsBlank(strain)) {
2350         unit_test_util::SetOrgMod(entry, COrgMod::eSubtype_strain, strain);
2351     }
2352     if (!NStr::IsBlank(serotype)) {
2353         unit_test_util::SetOrgMod(entry, COrgMod::eSubtype_serotype, serotype);
2354     }
2355     if (!NStr::IsBlank(clone)) {
2356         unit_test_util::SetSubSource(entry, CSubSource::eSubtype_clone, clone);
2357     }
2358     if (!NStr::IsBlank(segment)) {
2359         unit_test_util::SetSubSource(entry, CSubSource::eSubtype_segment, segment);
2360     }
2361 
2362     AddTitle(entry, defline);
2363 
2364     CheckDeflineMatches(entry);
2365 
2366 }
2367 
2368 
BOOST_AUTO_TEST_CASE(Test_GB_7485)2369 BOOST_AUTO_TEST_CASE(Test_GB_7485)
2370 {
2371     CheckInfluenzaDefline("Influenza A virus", "", "", "", "", "Influenza A virus sequence.");
2372     CheckInfluenzaDefline("Influenza B virus", "", "", "", "", "Influenza B virus sequence.");
2373     CheckInfluenzaDefline("Influenza A virus", "x", "", "", "", "Influenza A virus (x) sequence.");
2374     CheckInfluenzaDefline("Influenza B virus", "x", "", "", "", "Influenza B virus (x) sequence.");
2375     CheckInfluenzaDefline("Influenza A virus", "x", "y", "", "", "Influenza A virus (x(y)) sequence.");
2376     CheckInfluenzaDefline("Influenza B virus", "x", "y", "", "", "Influenza B virus (x) sequence.");
2377     CheckInfluenzaDefline("Influenza A virus", "", "y", "", "", "Influenza A virus ((y)) sequence.");
2378     CheckInfluenzaDefline("Influenza B virus", "", "y", "", "", "Influenza B virus sequence.");
2379     CheckInfluenzaDefline("Influenza A virus", "x", "y", "c", "", "Influenza A virus (x(y)) clone c sequence.");
2380     CheckInfluenzaDefline("Influenza B virus", "x", "y", "c", "", "Influenza B virus (x) clone c sequence.");
2381     CheckInfluenzaDefline("Influenza A virus", "x", "y", "", "1", "Influenza A virus (x(y)) segment 1 sequence.");
2382     CheckInfluenzaDefline("Influenza B virus", "x", "y", "", "1", "Influenza B virus (x) segment 1 sequence.");
2383     CheckInfluenzaDefline("Influenza A virus", "x", "y", "c", "1", "Influenza A virus (x(y)) clone c segment 1 sequence.");
2384     CheckInfluenzaDefline("Influenza B virus", "x", "y", "c", "1", "Influenza B virus (x) clone c segment 1 sequence.");
2385 
2386     CheckInfluenzaDefline("Influenza A virus (x(y))", "x", "y", "c", "1", "Influenza A virus (x(y)) clone c segment 1 sequence.");
2387     CheckInfluenzaDefline("Influenza C virus (x)", "x", "y", "c", "1", "Influenza C virus (x) clone c segment 1 sequence.");
2388 
2389 }
2390 
2391 
BOOST_AUTO_TEST_CASE(Test_GB_7534)2392 BOOST_AUTO_TEST_CASE(Test_GB_7534)
2393 {
2394     CRef<CSeq_entry> entry = unit_test_util::BuildGoodNucProtSet();
2395     unit_test_util::SetTaxname(entry, "Amomum chryseum");
2396     unit_test_util::SetGenome(entry, CBioSource::eGenome_chloroplast);
2397     CRef<CSeq_feat> prot = unit_test_util::GetProtFeatFromGoodNucProtSet(entry);
2398     prot->SetData().SetProt().SetName().front() = "maturase K";
2399 
2400     CRef<CSeq_feat> cds = unit_test_util::GetCDSFromGoodNucProtSet(entry);
2401     CRef<CSeq_feat> gene1 = unit_test_util::MakeGeneForFeature(cds);
2402     gene1->SetData().SetGene().SetLocus("matK");
2403     CRef<CSeq_entry> nuc = unit_test_util::GetNucleotideSequenceFromGoodNucProtSet(entry);
2404     AddFeat(gene1, nuc);
2405     cds->SetXref().push_back(CRef<CSeqFeatXref>(new CSeqFeatXref()));
2406     cds->SetXref().front()->SetData().Assign(gene1->GetData());
2407 
2408     CRef<CSeq_feat> gene2(new CSeq_feat());
2409     gene2->Assign(*gene1);
2410     gene2->SetData().SetGene().SetLocus("trnK");
2411     gene2->SetData().SetGene().SetDesc("tRNA-Lys");
2412     AddFeat(gene2, nuc);
2413     CRef<CSeq_feat> intron(new CSeq_feat());
2414     intron->Assign(*gene2);
2415     intron->SetData().SetImp().SetKey("intron");
2416     intron->SetXref().push_back(CRef<CSeqFeatXref>(new CSeqFeatXref()));
2417     intron->SetXref().front()->SetData().Assign(gene2->GetData());
2418     AddFeat(intron, nuc);
2419 
2420     AddTitle(entry, "Amomum chryseum tRNA-Lys (trnK) gene, intron; and maturase K (matK) gene, complete cds; chloroplast.");
2421 
2422     CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
2423 
2424     CRef<CScope> scope(new CScope(*object_manager));
2425     CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
2426 
2427     objects::CAutoDefWithTaxonomy autodef;
2428 
2429     // add to autodef
2430     autodef.AddSources(seh);
2431     autodef.SetKeepIntrons(true);
2432 
2433     CRef<CAutoDefModifierCombo> mod_combo;
2434     mod_combo = autodef.FindBestModifierCombo();
2435 
2436 
2437     CheckDeflineMatches(seh, autodef, mod_combo);
2438 
2439 }
2440 
2441 
BOOST_AUTO_TEST_CASE(Test_SQD_4451)2442 BOOST_AUTO_TEST_CASE(Test_SQD_4451)
2443 {
2444     CRef<CSeq_entry> entry = BuildSequence();
2445     CRef<CSeqdesc> desc = AddSource(entry, "Fusarium incarnatum");
2446     CRef<CSeq_feat> feat1(new CSeq_feat());
2447     feat1->SetData().SetRna().SetType(CRNA_ref::eType_miscRNA);
2448     feat1->SetData().SetRna().SetExt().SetName("internal transcribed spacer region");
2449     AddFeat(feat1, entry);
2450     feat1->SetLocation().SetInt().SetFrom(0);
2451     feat1->SetLocation().SetInt().SetTo(59);
2452     feat1->SetLocation().SetPartialStart(true, eExtreme_Biological);
2453     feat1->SetLocation().SetPartialStop(true, eExtreme_Biological);
2454 
2455     AddTitle(entry, "Fusarium incarnatum internal transcribed spacer region, partial sequence.");
2456 
2457     CheckDeflineMatches(entry);
2458 }
2459 
BOOST_AUTO_TEST_CASE(Test_SQD_4529)2460 BOOST_AUTO_TEST_CASE(Test_SQD_4529)
2461 {
2462     CRef<CSeq_entry> entry = BuildSequence();
2463     CRef<CSeqdesc> desc = AddSource(entry, "Fusarium incarnatum");
2464     CRef<CSeq_feat> feat1 = unit_test_util::AddMiscFeature(entry);
2465     feat1->SetComment("similar to beta-tubulin");
2466 
2467     AddTitle(entry, "Fusarium incarnatum beta-tubulin-like gene, complete sequence.");
2468 
2469     CheckDeflineMatches(entry, true, CAutoDefOptions::eListAllFeatures, CAutoDefOptions::eDelete);
2470 
2471     CRef<objects::CSeq_feat> rrna1 = unit_test_util::AddMiscFeature(entry);
2472     rrna1->ResetComment();
2473     rrna1->SetData().SetRna().SetType(CRNA_ref::eType_rRNA);
2474     rrna1->SetData().SetRna().SetExt().SetName("foo");
2475     AddTitle(entry, "Fusarium incarnatum foo gene, complete sequence.");
2476     CheckDeflineMatches(entry, true, CAutoDefOptions::eListAllFeatures, CAutoDefOptions::eDelete);
2477 
2478 }
2479 
2480 
AddProtFeat(CRef<CSeq_entry> prot,CProt_ref::EProcessed proc)2481 void AddProtFeat(CRef<CSeq_entry> prot, CProt_ref::EProcessed proc)
2482 {
2483     CRef<CSeq_feat> p = unit_test_util::AddMiscFeature(prot);
2484     p->SetData().SetProt().SetProcessed(proc);
2485     p->SetData().SetProt().SetName().clear();
2486     p->SetData().SetProt().SetName().push_back("RdRp");
2487     p->ResetComment();
2488 }
2489 
2490 
TestMatPeptideListing(bool cds_is_partial,bool has_sig_peptide)2491 void TestMatPeptideListing(bool cds_is_partial, bool has_sig_peptide)
2492 {
2493     CRef<CSeq_entry> entry = unit_test_util::BuildGoodNucProtSet();
2494     CRef<CSeq_feat> cds = unit_test_util::GetCDSFromGoodNucProtSet(entry);
2495     if (cds_is_partial) {
2496         cds->SetLocation().SetPartialStop(true, eExtreme_Biological);
2497         cds->SetPartial(true);
2498     }
2499     CRef<CSeq_feat> gene = unit_test_util::MakeGeneForFeature(cds);
2500     gene->SetData().SetGene().SetLocus("ORF1");
2501     CRef<CSeq_entry> nuc = unit_test_util::GetNucleotideSequenceFromGoodNucProtSet(entry);
2502     unit_test_util::AddFeat(gene, nuc);
2503     CRef<CSeq_feat> pfeat = unit_test_util::GetProtFeatFromGoodNucProtSet(entry);
2504     pfeat->SetData().SetProt().SetName().clear();
2505     pfeat->SetData().SetProt().SetName().push_back("nonstructural polyprotein");
2506     CRef<CSeq_entry> prot = unit_test_util::GetProteinSequenceFromGoodNucProtSet(entry);
2507     AddProtFeat(prot, CProt_ref::eProcessed_mature);
2508     if (has_sig_peptide) {
2509         AddProtFeat(prot, CProt_ref::eProcessed_signal_peptide);
2510     }
2511 
2512     if (cds_is_partial) {
2513         if (has_sig_peptide) {
2514             AddTitle(nuc, "Sebaea microphylla nonstructural polyprotein (ORF1) gene, partial cds.");
2515         } else {
2516             AddTitle(nuc, "Sebaea microphylla nonstructural polyprotein, RdRp region, (ORF1) gene, partial cds.");
2517         }
2518     } else {
2519         AddTitle(nuc, "Sebaea microphylla nonstructural polyprotein (ORF1) gene, complete cds.");
2520     }
2521     CheckDeflineMatches(entry, true, CAutoDefOptions::eListAllFeatures, CAutoDefOptions::eDelete);
2522 
2523 }
2524 
BOOST_AUTO_TEST_CASE(Test_SQD_4593)2525 BOOST_AUTO_TEST_CASE(Test_SQD_4593)
2526 {
2527     TestMatPeptideListing(true, false);
2528     TestMatPeptideListing(true, true);
2529     TestMatPeptideListing(false, false);
2530     TestMatPeptideListing(false, true);
2531 }
2532 
2533 
BOOST_AUTO_TEST_CASE(Test_SQD_4607)2534 BOOST_AUTO_TEST_CASE(Test_SQD_4607)
2535 {
2536     CRef<CSeq_entry> entry = unit_test_util::BuildGoodSeq();
2537     CRef<CSeq_feat> feat1 = unit_test_util::AddMiscFeature(entry);
2538     feat1->SetComment("contains promoter and 5' UTR");
2539 
2540     AddTitle(entry, "Sebaea microphylla promoter region and 5' UTR, genomic sequence.");
2541 
2542     CheckDeflineMatches(entry, true, CAutoDefOptions::eListAllFeatures, CAutoDefOptions::eDelete);
2543 
2544 }
2545 
2546 
CheckRegulatoryFeatures(const string & expected_title,bool keep_promoters,bool keep_regulatory)2547 void CheckRegulatoryFeatures(const string& expected_title, bool keep_promoters, bool keep_regulatory)
2548 {
2549     CRef<CSeq_entry> entry = unit_test_util::BuildGoodSeq();
2550     CRef<CSeq_feat> promoter = unit_test_util::AddMiscFeature(entry);
2551     promoter->SetData().SetImp().SetKey("regulatory");
2552     promoter->SetQual().push_back(CRef<CGb_qual>(new CGb_qual("regulatory_class", "promoter")));
2553     promoter->ResetComment();
2554     CRef<CSeq_feat> rbs = unit_test_util::AddMiscFeature(entry);
2555     rbs->SetData().SetImp().SetKey("regulatory");
2556     rbs->SetQual().push_back(CRef<CGb_qual>(new CGb_qual("regulatory_class", "ribosome_binding_site")));
2557     rbs->ResetComment();
2558 
2559     CRef<CSeq_feat> gene = unit_test_util::AddMiscFeature(entry);
2560     gene->SetData().SetGene().SetLocus("msa");
2561     gene->SetData().SetGene().SetDesc("mannose-specific adhesin");
2562     gene->ResetComment();
2563 
2564     AddTitle(entry, expected_title);
2565 
2566     CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
2567 
2568     CRef<CScope> scope(new CScope(*object_manager));
2569     CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
2570 
2571     objects::CAutoDefWithTaxonomy autodef;
2572 
2573     // add to autodef
2574     autodef.AddSources(seh);
2575 
2576     CRef<CAutoDefModifierCombo> mod_combo = autodef.FindBestModifierCombo();
2577 
2578     autodef.SetFeatureListType(CAutoDefOptions::eListAllFeatures);
2579     autodef.SetKeepRegulatoryFeatures(keep_regulatory);
2580     autodef.SetUseFakePromoters(keep_promoters);
2581 
2582     CheckDeflineMatches(seh, autodef, mod_combo);
2583 
2584 }
2585 
2586 
BOOST_AUTO_TEST_CASE(Test_SQD_4612)2587 BOOST_AUTO_TEST_CASE(Test_SQD_4612)
2588 {
2589     CheckRegulatoryFeatures("Sebaea microphylla mannose-specific adhesin (msa) gene, promoter region.", false, false);
2590     CheckRegulatoryFeatures("Sebaea microphylla mannose-specific adhesin (msa) gene, promoter region.", true, false);
2591     CheckRegulatoryFeatures("Sebaea microphylla mannose-specific adhesin (msa) gene, promoter region and ribosome_binding_site.", true, true);
2592 
2593 }
2594 
BOOST_AUTO_TEST_CASE(Test_GB_8547)2595 BOOST_AUTO_TEST_CASE(Test_GB_8547)
2596 {
2597     CRef<CSeq_entry> entry = unit_test_util::BuildGoodSeq();
2598     unit_test_util::SetTaxname(entry, "Influenza A virus");
2599     unit_test_util::SetOrgMod(entry, COrgMod::eSubtype_strain, "A/Florida/57/2019");
2600     unit_test_util::SetSubSource(entry, CSubSource::eSubtype_segment, "5");
2601 
2602     AddTitle(entry, "Influenza A virus (A/Florida/57/2019) segment 5 sequence.");
2603 
2604     CheckDeflineMatches(entry);
2605 }
2606 
BOOST_AUTO_TEST_CASE(Test_GB_8604)2607 BOOST_AUTO_TEST_CASE(Test_GB_8604)
2608 {
2609     CRef<CSeq_entry> entry = unit_test_util::BuildGoodNucProtSet();
2610     CRef<CSeq_entry> nuc = unit_test_util::GetNucleotideSequenceFromGoodNucProtSet(entry);
2611     CRef<CSeq_feat> cds = unit_test_util::GetCDSFromGoodNucProtSet(entry);
2612     cds->SetLocation().SetPartialStart(true, eExtreme_Biological);
2613     cds->SetPartial(true);
2614     CRef<CSeq_feat> pfeat = unit_test_util::GetProtFeatFromGoodNucProtSet(entry);
2615     pfeat->SetData().SetProt().SetName().front() = "proannomuricatin G";
2616     CRef<CSeq_feat> mrna = unit_test_util::MakemRNAForCDS(cds);
2617     mrna->SetData().SetRna().SetExt().SetName("proannomuricatin G");
2618     unit_test_util::AddFeat(mrna, nuc);
2619     CRef<CSeq_feat> gene = unit_test_util::MakeGeneForFeature(mrna);
2620     gene->SetData().SetGene().SetLocus("PamG");
2621     unit_test_util::AddFeat(gene, nuc);
2622 
2623     // check without mat-peptide first
2624     AddTitle(nuc, "Sebaea microphylla proannomuricatin G (PamG) gene, partial cds.");
2625 
2626     CheckDeflineMatches(entry);
2627 
2628     // check with mat-peptide
2629     CRef<CSeq_entry> prot = unit_test_util::GetProteinSequenceFromGoodNucProtSet(entry);
2630     CRef<CSeq_feat> mat_peptide = unit_test_util::AddMiscFeature(prot);
2631     mat_peptide->ResetComment();
2632     mat_peptide->SetData().SetProt().SetProcessed(CProt_ref::eProcessed_mature);
2633     mat_peptide->SetData().SetProt().SetName().push_back("annomuricatin G");
2634 
2635     // if suppressing mat-peptide, no change
2636     CheckDeflineMatches(entry, CSeqFeatData::eSubtype_mat_peptide_aa);
2637 
2638     // show when not suppressing
2639     AddTitle(entry, "Sebaea microphylla proannomuricatin G, annomuricatin G region, (PamG) gene, partial cds.");
2640     CheckDeflineMatches(entry);
2641 }
2642 
MakeRegulatoryFeature(const string & reg_class,const string & comment,TSeqPos start_pos,CRef<CSeq_entry> entry)2643 CRef<CSeq_feat> MakeRegulatoryFeature(const string& reg_class, const string& comment, TSeqPos start_pos, CRef<CSeq_entry> entry)
2644 {
2645     CRef<CSeq_feat> reg = unit_test_util::AddMiscFeature(entry);
2646     reg->SetData().SetImp().SetKey("regulatory");
2647     reg->SetComment(comment);
2648     reg->SetQual().push_back(CRef<CGb_qual>(new CGb_qual("regulatory_class", reg_class)));
2649     reg->SetLocation().SetInt().SetFrom(start_pos);
2650     reg->SetLocation().SetInt().SetTo(start_pos + 4);
2651     return reg;
2652 }
2653 
MakeRptRegion(const string & rpt_type,TSeqPos start_pos,CRef<CSeq_entry> entry)2654 CRef<CSeq_feat> MakeRptRegion(const string& rpt_type, TSeqPos start_pos, CRef<CSeq_entry> entry)
2655 {
2656     CRef<CSeq_feat> reg = unit_test_util::AddMiscFeature(entry);
2657     reg->ResetComment();
2658     reg->SetData().SetImp().SetKey("repeat_region");
2659     reg->SetQual().push_back(CRef<CGb_qual>(new CGb_qual("rpt_type", rpt_type)));
2660     reg->SetLocation().SetInt().SetFrom(start_pos);
2661     reg->SetLocation().SetInt().SetTo(start_pos + 4);
2662     return reg;
2663 }
2664 
2665 
TestRepeatRegion(CRef<CSeq_entry> entry)2666 void TestRepeatRegion(CRef<CSeq_entry> entry)
2667 {
2668     CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
2669 
2670     CRef<CScope> scope(new CScope(*object_manager));
2671     CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
2672 
2673     objects::CAutoDefWithTaxonomy autodef;
2674 
2675     // add to autodef
2676     autodef.AddSources(seh);
2677 
2678     CRef<CAutoDefModifierCombo> mod_combo = autodef.FindBestModifierCombo();
2679 
2680     autodef.SetFeatureListType(CAutoDefOptions::eListAllFeatures);
2681     autodef.SetKeepRepeatRegion(true);
2682 
2683     CheckDeflineMatches(seh, autodef, mod_combo);
2684 
2685 }
2686 
BOOST_AUTO_TEST_CASE(Test_GB_8854)2687 BOOST_AUTO_TEST_CASE(Test_GB_8854)
2688 {
2689     CRef<CSeq_entry> entry = unit_test_util::BuildGoodSeq();
2690 
2691     CRef<CSeq_feat> rpt = MakeRptRegion("long_terminal_repeat", 15, entry);
2692     AddTitle(entry, "Sebaea microphylla LTR repeat region.");
2693     CheckDeflineMatches(entry);
2694     TestRepeatRegion(entry);
2695 
2696     CRef<CSeq_feat> reg1 = MakeRegulatoryFeature("CAAT_signal", "U3 region", 0, entry);
2697     CRef<CSeq_feat> reg2 = MakeRegulatoryFeature("TATA_box", "U3 region", 5, entry);
2698     CRef<CSeq_feat> reg3 = MakeRegulatoryFeature("polyA_signal_sequence", "R-region", 10, entry);
2699 
2700     TestRepeatRegion(entry);
2701 }
2702 
2703 
BOOST_AUTO_TEST_CASE(Test_ClauseListOptions)2704 BOOST_AUTO_TEST_CASE(Test_ClauseListOptions)
2705 {
2706     CRef<CSeq_entry> entry = unit_test_util::BuildGoodSeq();
2707 
2708     AddTitle(entry, "Sebaea microphylla, complete sequence.");
2709     CheckDeflineMatches(entry, true, CAutoDefOptions::eCompleteSequence);
2710 
2711     AddTitle(entry, "Sebaea microphylla, complete genome.");
2712     CheckDeflineMatches(entry, true, CAutoDefOptions::eCompleteGenome);
2713 
2714     AddTitle(entry, "Sebaea microphylla, partial sequence.");
2715     CheckDeflineMatches(entry, true, CAutoDefOptions::ePartialSequence);
2716 
2717     AddTitle(entry, "Sebaea microphylla, partial genome.");
2718     CheckDeflineMatches(entry, true, CAutoDefOptions::ePartialGenome);
2719 
2720     AddTitle(entry, "Sebaea microphylla whole genome shotgun sequence.");
2721     CheckDeflineMatches(entry, true, CAutoDefOptions::eWholeGenomeShotgunSequence);
2722 }
2723 
2724 
2725 END_SCOPE(objects)
2726 END_NCBI_SCOPE
2727