1 /* $Id: unit_test_autodef.cpp 632623 2021-06-03 17:38:11Z ivanov $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Colleen Bollin, NCBI
27 *
28 * File Description:
29 * Unit tests for the validator.
30 *
31 * ===========================================================================
32 */
33
34 #include <ncbi_pch.hpp>
35
36 #include "unit_test_autodef.hpp"
37
38 #include <corelib/ncbi_system.hpp>
39
40 // This macro should be defined before inclusion of test_boost.hpp in all
41 // "*.cpp" files inside executable except one. It is like function main() for
42 // non-Boost.Test executables is defined only in one *.cpp file - other files
43 // should not include it. If NCBI_BOOST_NO_AUTO_TEST_MAIN will not be defined
44 // then test_boost.hpp will define such "main()" function for tests.
45 //
46 // Usually if your unit tests contain only one *.cpp file you should not
47 // care about this macro at all.
48 //
49 //#define NCBI_BOOST_NO_AUTO_TEST_MAIN
50
51
52 // This header must be included before all Boost.Test headers if there are any
53 #include <corelib/test_boost.hpp>
54
55 #include <objects/biblio/Id_pat.hpp>
56 #include <objects/biblio/Title.hpp>
57 #include <objects/general/Object_id.hpp>
58 #include <objects/general/Dbtag.hpp>
59 #include <objects/general/User_object.hpp>
60 #include <objects/medline/Medline_entry.hpp>
61 #include <objects/misc/sequence_macros.hpp>
62 #include <objects/pub/Pub_equiv.hpp>
63 #include <objects/pub/Pub.hpp>
64 #include <objects/seqset/Seq_entry.hpp>
65 #include <objects/seq/GIBB_mol.hpp>
66 #include <objects/seq/Seq_ext.hpp>
67 #include <objects/seq/Delta_ext.hpp>
68 #include <objects/seq/Delta_seq.hpp>
69 #include <objects/seq/Seq_literal.hpp>
70 #include <objects/seq/Ref_ext.hpp>
71 #include <objects/seq/Map_ext.hpp>
72 #include <objects/seq/Seg_ext.hpp>
73 #include <objects/seq/Seq_gap.hpp>
74 #include <objects/seq/Seq_data.hpp>
75 #include <objects/seq/Seq_descr.hpp>
76 #include <objects/seq/Seqdesc.hpp>
77 #include <objects/seq/MolInfo.hpp>
78 #include <objects/seq/Pubdesc.hpp>
79 #include <objects/seq/Seq_hist.hpp>
80 #include <objects/seq/Seq_hist_rec.hpp>
81 #include <objects/seqalign/Dense_seg.hpp>
82 #include <objects/seqblock/GB_block.hpp>
83 #include <objects/seqblock/EMBL_block.hpp>
84 #include <objects/seqfeat/BioSource.hpp>
85 #include <objects/seqfeat/Org_ref.hpp>
86 #include <objects/seqfeat/OrgName.hpp>
87 #include <objects/seqfeat/SubSource.hpp>
88 #include <objects/seqfeat/Imp_feat.hpp>
89 #include <objects/seqfeat/Cdregion.hpp>
90 #include <objects/seqfeat/RNA_ref.hpp>
91 #include <objects/seqfeat/Gb_qual.hpp>
92 #include <objects/seqloc/Seq_id.hpp>
93 #include <objects/seqloc/PDB_seq_id.hpp>
94 #include <objects/seqloc/Giimport_id.hpp>
95 #include <objects/seqloc/Patent_seq_id.hpp>
96 #include <objects/seqloc/Seq_loc.hpp>
97 #include <objects/seqloc/Seq_interval.hpp>
98 #include <objmgr/object_manager.hpp>
99 #include <objmgr/scope.hpp>
100 #include <objmgr/bioseq_ci.hpp>
101 #include <objmgr/feat_ci.hpp>
102 #include <objmgr/seq_vector.hpp>
103 #include <objmgr/util/sequence.hpp>
104 #include <objmgr/seqdesc_ci.hpp>
105 #include <objects/seq/seqport_util.hpp>
106 #include <objtools/data_loaders/genbank/gbloader.hpp>
107 #include <objtools/unit_test_util/unit_test_util.hpp>
108 #include <corelib/ncbiapp.hpp>
109
110 #include <objtools/edit/autodef_with_tax.hpp>
111
112
113 // for writing out tmp files
114 #include <serial/objostrasn.hpp>
115 #include <serial/objostrasnb.hpp>
116
117
118 #include <common/test_assert.h> /* This header must go last */
119
120
121 extern const char* sc_TestEntryCollidingLocusTags;
122
123 BEGIN_NCBI_SCOPE
BEGIN_SCOPE(objects)124 BEGIN_SCOPE(objects)
125
126
127
128
129
130 NCBITEST_INIT_TREE()
131 {
132 if ( !CNcbiApplication::Instance()->GetConfig().HasEntry("NCBI", "Data") ) {
133 }
134 }
135
136 static bool s_debugMode = false;
137
NCBITEST_INIT_CMDLINE(arg_desc)138 NCBITEST_INIT_CMDLINE(arg_desc)
139 {
140 // Here we make descriptions of command line parameters that we are
141 // going to use.
142
143 arg_desc->AddFlag( "debug_mode",
144 "Debugging mode writes errors seen for each test" );
145 }
146
NCBITEST_AUTO_INIT()147 NCBITEST_AUTO_INIT()
148 {
149 // initialization function body
150
151 const CArgs& args = CNcbiApplication::Instance()->GetArgs();
152 if (args["debug_mode"]) {
153 s_debugMode = true;
154 }
155 }
156
157
BuildSequence()158 static CRef<CSeq_entry> BuildSequence()
159 {
160 CRef<CSeq_entry> entry(new CSeq_entry());
161 entry->SetSeq().SetInst().SetMol(CSeq_inst::eMol_dna);
162 entry->SetSeq().SetInst().SetRepr(CSeq_inst::eRepr_raw);
163 entry->SetSeq().SetInst().SetSeq_data().SetIupacna().Set("AATTGGCCAAAATTGGCCAAAATTGGCCAAAATTGGCCAAAATTGGCCAAAATTGGCCAA");
164 entry->SetSeq().SetInst().SetLength(60);
165
166 CRef<CSeq_id> id(new CSeq_id());
167 id->SetLocal().SetStr ("good");
168 entry->SetSeq().SetId().push_back(id);
169
170 CRef<CSeqdesc> mdesc(new CSeqdesc());
171 mdesc->SetMolinfo().SetBiomol(CMolInfo::eBiomol_genomic);
172 entry->SetSeq().SetDescr().Set().push_back(mdesc);
173 return entry;
174 }
175
176
AddSource(CRef<CSeq_entry> entry,string taxname)177 static CRef<CSeqdesc> AddSource (CRef<CSeq_entry> entry, string taxname)
178 {
179 CRef<CSeqdesc> odesc(new CSeqdesc());
180 odesc->SetSource().SetOrg().SetTaxname(taxname);
181
182 if (entry->IsSeq()) {
183 entry->SetSeq().SetDescr().Set().push_back(odesc);
184 } else if (entry->IsSet()) {
185 entry->SetSet().SetDescr().Set().push_back(odesc);
186 }
187 return odesc;
188 }
189
190
AddTitle(CRef<CSeq_entry> entry,string defline)191 static void AddTitle (CRef<CSeq_entry> entry, string defline)
192 {
193 CRef<CSeqdesc> odesc(new CSeqdesc());
194 odesc->SetTitle(defline);
195
196 if (entry->IsSeq()) {
197 bool found = false;
198 if (entry->SetSeq().IsSetDescr()) {
199 NON_CONST_ITERATE(CBioseq::TDescr::Tdata, it, entry->SetSeq().SetDescr().Set()) {
200 if ((*it)->IsTitle()) {
201 (*it)->SetTitle(defline);
202 found = true;
203 }
204 }
205 }
206 if (!found) {
207 entry->SetSeq().SetDescr().Set().push_back(odesc);
208 }
209 } else if (entry->IsSet()) {
210 if (entry->GetSet().IsSetClass() && entry->GetSet().GetClass() == CBioseq_set::eClass_nuc_prot) {
211 AddTitle (entry->SetSet().SetSeq_set().front(), defline);
212 } else {
213 entry->SetSet().SetDescr().Set().push_back(odesc);
214 }
215 }
216 }
217
218
HasBoolField(const CUser_object & user,const string & field_name)219 size_t HasBoolField(const CUser_object& user, const string& field_name)
220 {
221 size_t num_found = 0;
222 ITERATE(CUser_object::TData, it, user.GetData()) {
223 if ((*it)->IsSetLabel() && (*it)->GetLabel().IsStr() &&
224 NStr::EqualNocase((*it)->GetLabel().GetStr(), field_name)) {
225 num_found++;
226 if (!(*it)->IsSetData()) {
227 BOOST_CHECK_EQUAL("Data for " + field_name + "should be set", "Data not set");
228 } else {
229 BOOST_CHECK_EQUAL((*it)->GetData().Which(), CUser_field::TData::e_Bool);
230 if ((*it)->GetData().IsBool()) {
231 BOOST_CHECK_EQUAL((*it)->GetData().GetBool(), true);
232 }
233 }
234 }
235 }
236 return num_found;
237 }
238
HasStringField(const CUser_object & user,const string & field_name,const string & value)239 size_t HasStringField(const CUser_object& user, const string& field_name, const string& value)
240 {
241 size_t num_found = 0;
242 ITERATE(CUser_object::TData, it, user.GetData()) {
243 if ((*it)->IsSetLabel() && (*it)->GetLabel().IsStr() &&
244 NStr::EqualNocase((*it)->GetLabel().GetStr(), field_name)) {
245 num_found++;
246 if (!(*it)->IsSetData()) {
247 BOOST_CHECK_EQUAL("Data for " + field_name + "should be set", "Data not set");
248 } else {
249 BOOST_CHECK_EQUAL((*it)->GetData().Which(), CUser_field::TData::e_Str);
250 if ((*it)->GetData().IsStr()) {
251 BOOST_CHECK_EQUAL((*it)->GetData().GetStr(), value);
252 }
253 }
254 }
255 }
256 return num_found;
257 }
258
HasIntField(const CUser_object & user,const string & field_name,int value)259 size_t HasIntField(const CUser_object& user, const string& field_name, int value)
260 {
261 size_t num_found = 0;
262 ITERATE(CUser_object::TData, it, user.GetData()) {
263 if ((*it)->IsSetLabel() && (*it)->GetLabel().IsStr() &&
264 NStr::EqualNocase((*it)->GetLabel().GetStr(), field_name)) {
265 num_found++;
266 if (!(*it)->IsSetData()) {
267 BOOST_CHECK_EQUAL("Data for " + field_name + "should be set", "Data not set");
268 } else {
269 BOOST_CHECK_EQUAL((*it)->GetData().Which(), CUser_field::TData::e_Int);
270 if ((*it)->GetData().IsInt()) {
271 BOOST_CHECK_EQUAL((*it)->GetData().GetInt(), value);
272 }
273 }
274 }
275 }
276 return num_found;
277 }
278
279
CheckAutoDefOptions(const CUser_object & user,CAutoDefOptions & opts)280 void CheckAutoDefOptions
281 (const CUser_object& user,
282 CAutoDefOptions& opts)
283 {
284 size_t expected_num_fields = 7;
285 if (opts.GetOrgMods().size() > 0 || opts.GetSubSources().size() > 0) {
286 expected_num_fields++;
287 }
288 if (!opts.GetDoNotApplyToSp()) {
289 expected_num_fields--;
290 }
291 if (opts.GetUseLabels()) {
292 expected_num_fields++;
293 }
294 if (opts.GetAllowModAtEndOfTaxname()) {
295 expected_num_fields++;
296 }
297 if (opts.GetUseFakePromoters()) {
298 expected_num_fields ++;
299 }
300 if (opts.GetKeepRegulatoryFeatures()) {
301 expected_num_fields++;
302 }
303 if (opts.GetKeepIntrons()) {
304 expected_num_fields++;
305 }
306 if (opts.GetKeepExons()) {
307 expected_num_fields++;
308 }
309 if (opts.GetKeepuORFs()) {
310 expected_num_fields++;
311 }
312 if (opts.GetKeepMobileElements()) {
313 expected_num_fields++;
314 }
315 if (opts.AreAnyFeaturesSuppressed()) {
316 expected_num_fields++;
317 }
318 if (opts.GetKeepMiscRecomb()) {
319 expected_num_fields++;
320 }
321 if (opts.GetKeep5UTRs()) {
322 expected_num_fields++;
323 }
324 if (opts.GetKeep3UTRs()) {
325 expected_num_fields++;
326 }
327 if (opts.GetKeepRepeatRegion()) {
328 expected_num_fields++;
329 }
330 if (!NStr::IsBlank(opts.GetCustomFeatureClause())) {
331 expected_num_fields++;
332 }
333
334 BOOST_CHECK_EQUAL(user.GetObjectType(), CUser_object::eObjectType_AutodefOptions);
335 BOOST_CHECK_EQUAL(user.GetData().size(), expected_num_fields);
336 BOOST_CHECK_EQUAL(HasBoolField(user, "LeaveParenthetical"), 1);
337 BOOST_CHECK_EQUAL(HasBoolField(user, "SpecifyNuclearProduct"), 1);
338 if (opts.GetUseLabels()) {
339 BOOST_CHECK_EQUAL(HasBoolField(user, "UseLabels"), 1);
340 }
341 if (opts.GetAllowModAtEndOfTaxname()) {
342 BOOST_CHECK_EQUAL(HasBoolField(user, "AllowModAtEndOfTaxname"), 1);
343 }
344 if (opts.GetDoNotApplyToSp()) {
345 BOOST_CHECK_EQUAL(HasBoolField(user, "DoNotApplyToSp"), 1);
346 }
347 if (opts.GetUseFakePromoters()) {
348 BOOST_CHECK_EQUAL(HasBoolField(user, "UseFakePromoters"), 1);
349 }
350 if (opts.GetKeepIntrons()) {
351 BOOST_CHECK_EQUAL(HasBoolField(user, "KeepIntrons"), 1);
352 }
353 if (opts.GetKeepExons()) {
354 BOOST_CHECK_EQUAL(HasBoolField(user, "KeepExons"), 1);
355 }
356 if (opts.GetKeepuORFs()) {
357 BOOST_CHECK_EQUAL(HasBoolField(user, "KeepuORFs"), 1);
358 }
359 BOOST_CHECK_EQUAL(HasStringField(user, "MiscFeatRule", opts.GetMiscFeatRule(opts.GetMiscFeatRule())) , 1);
360 BOOST_CHECK_EQUAL(HasStringField(user, "FeatureListType", opts.GetFeatureListType(opts.GetFeatureListType())), 1);
361 BOOST_CHECK_EQUAL(HasStringField(user, "HIVRule", "WantBoth"), 1);
362 if (!NStr::IsBlank(opts.GetCustomFeatureClause())) {
363 BOOST_CHECK_EQUAL(HasStringField(user, "CustomFeatureClause", opts.GetCustomFeatureClause()), 1);
364 }
365 BOOST_CHECK_EQUAL(HasIntField(user, "MaxMods", -99), 1);
366 if (user.GetData().size() != expected_num_fields) {
367 int field_num = 1;
368 ITERATE(CUser_object::TData, it, user.GetData()) {
369 if (!(*it)->IsSetLabel() || !(*it)->GetLabel().IsStr()) {
370 BOOST_CHECK_EQUAL("Label should be set", "label not set for " + NStr::IntToString(field_num));
371 } else {
372 printf("%s\n", (*it)->GetLabel().GetStr().c_str());
373 }
374 }
375 }
376 }
377
378
CheckDeflineMatches(CSeq_entry_Handle seh,objects::CAutoDefWithTaxonomy & autodef,CRef<CAutoDefModifierCombo> mod_combo)379 static void CheckDeflineMatches(CSeq_entry_Handle seh,
380 objects::CAutoDefWithTaxonomy& autodef,
381 CRef<CAutoDefModifierCombo> mod_combo)
382 {
383 // check defline for each nucleotide sequence
384 CBioseq_CI seq_iter(seh, CSeq_inst::eMol_na);
385 for ( ; seq_iter; ++seq_iter ) {
386 CBioseq_Handle bh (*seq_iter);
387 //Display ID of sequence
388 CConstRef<CSeq_id> id = bh.GetSeqId();
389
390 // original defline
391 string orig_defline = "";
392 CSeqdesc_CI desc_it(bh, CSeqdesc::e_Title, 1);
393 if (desc_it) {
394 orig_defline = desc_it->GetTitle();
395 }
396
397 string new_defline = autodef.GetOneDefLine(mod_combo, bh);
398
399 BOOST_CHECK_EQUAL(orig_defline, new_defline);
400
401 CRef<CUser_object> tmp_user = autodef.GetOptionsObject();
402 CAutoDefOptions opts;
403 opts.InitFromUserObject(*tmp_user);
404 mod_combo->InitOptions(opts);
405 CRef<CUser_object> user = opts.MakeUserObject();
406 CAutoDef autodef2;
407 autodef2.SetOptionsObject(*user);
408 new_defline = autodef2.GetOneDefLine(bh);
409 BOOST_CHECK_EQUAL(orig_defline, new_defline);
410 CheckAutoDefOptions(*user, opts);
411 }
412
413 // check popset title if needed
414
415 if (seh.IsSet() && seh.GetSet().GetCompleteBioseq_set()->NeedsDocsumTitle()) {
416 string orig_defline = "";
417 CSeqdesc_CI desc_it(seh, CSeqdesc::e_Title, 1);
418 if (desc_it) {
419 orig_defline = desc_it->GetTitle();
420 }
421 string new_defline = autodef.GetDocsumDefLine(seh);
422 BOOST_CHECK_EQUAL(orig_defline, new_defline);
423 }
424 }
425
426
CheckDeflineMatches(CRef<CSeq_entry> entry,vector<CSubSource::ESubtype> subsrcs,vector<COrgMod::ESubtype> orgmods)427 static void CheckDeflineMatches(CRef<CSeq_entry> entry,
428 vector<CSubSource::ESubtype> subsrcs,
429 vector<COrgMod::ESubtype> orgmods)
430 {
431 CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
432
433 CRef<CScope> scope(new CScope(*object_manager));
434 CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry (*entry);
435
436 objects::CAutoDefWithTaxonomy autodef;
437
438 // add to autodef
439 autodef.AddSources (seh);
440
441 CRef<CAutoDefModifierCombo> mod_combo;
442 mod_combo = new CAutoDefModifierCombo ();
443 mod_combo->SetUseModifierLabels(true);
444 ITERATE(vector<CSubSource::ESubtype>, it, subsrcs) {
445 mod_combo->AddSubsource(*it, true);
446 }
447 ITERATE(vector<COrgMod::ESubtype>, it, orgmods) {
448 mod_combo->AddOrgMod(*it, true);
449 }
450
451 autodef.SetFeatureListType(CAutoDefOptions::eListAllFeatures);
452 autodef.SetMiscFeatRule(CAutoDefOptions::eDelete);
453
454 CheckDeflineMatches(seh, autodef, mod_combo);
455 }
456
457
CheckDeflineMatches(CRef<CSeq_entry> entry,bool use_best=false,CAutoDefOptions::EFeatureListType list_type=CAutoDefOptions::eListAllFeatures,CAutoDefOptions::EMiscFeatRule misc_feat_rule=CAutoDefOptions::eNoncodingProductFeat)458 static void CheckDeflineMatches(CRef<CSeq_entry> entry, bool use_best = false,
459 CAutoDefOptions::EFeatureListType list_type = CAutoDefOptions::eListAllFeatures,
460 CAutoDefOptions::EMiscFeatRule misc_feat_rule = CAutoDefOptions::eNoncodingProductFeat)
461 {
462 CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
463
464 CRef<CScope> scope(new CScope(*object_manager));
465 CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry (*entry);
466
467 objects::CAutoDefWithTaxonomy autodef;
468
469 // add to autodef
470 autodef.AddSources (seh);
471
472 CRef<CAutoDefModifierCombo> mod_combo;
473 if (use_best) {
474 mod_combo = autodef.FindBestModifierCombo();
475 } else {
476 mod_combo = new CAutoDefModifierCombo ();
477 }
478
479 autodef.SetFeatureListType(list_type);
480 autodef.SetMiscFeatRule(misc_feat_rule);
481
482 CheckDeflineMatches(seh, autodef, mod_combo);
483 }
484
485
CheckDeflineMatches(CRef<CSeq_entry> entry,CSeqFeatData::ESubtype feat_to_suppress)486 static void CheckDeflineMatches(CRef<CSeq_entry> entry, CSeqFeatData::ESubtype feat_to_suppress)
487 {
488 CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
489
490 CRef<CScope> scope(new CScope(*object_manager));
491 CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
492
493 objects::CAutoDefWithTaxonomy autodef;
494
495 // add to autodef
496 autodef.AddSources(seh);
497
498 CRef<CAutoDefModifierCombo> mod_combo = autodef.FindBestModifierCombo();
499
500 autodef.SuppressFeature(feat_to_suppress);
501
502 autodef.SetFeatureListType(CAutoDefOptions::eListAllFeatures);
503 autodef.SetMiscFeatRule(CAutoDefOptions::eNoncodingProductFeat);
504
505 CheckDeflineMatches(seh, autodef, mod_combo);
506 }
507
508
FindNucInSeqEntry(CRef<CSeq_entry> entry)509 CRef<CSeq_entry> FindNucInSeqEntry(CRef<CSeq_entry> entry)
510 {
511 CRef<CSeq_entry> empty(NULL);
512 if (!entry) {
513 return empty;
514 } else if (entry->IsSeq() && entry->GetSeq().IsNa()) {
515 return entry;
516 } else if (entry->IsSet()) {
517 ITERATE(CBioseq_set::TSeq_set, it, entry->GetSet().GetSeq_set()) {
518 CRef<CSeq_entry> rval = FindNucInSeqEntry(*it);
519 if (rval) {
520 return rval;
521 }
522 }
523 }
524 return empty;
525 }
526
527
AddFeat(CRef<CSeq_feat> feat,CRef<CSeq_entry> entry)528 static void AddFeat (CRef<CSeq_feat> feat, CRef<CSeq_entry> entry)
529 {
530 CRef<CSeq_annot> annot;
531
532 if (entry->IsSeq()) {
533 if (!entry->GetSeq().IsSetAnnot()
534 || !entry->GetSeq().GetAnnot().front()->IsFtable()) {
535 CRef<CSeq_annot> new_annot(new CSeq_annot());
536 entry->SetSeq().SetAnnot().push_back(new_annot);
537 annot = new_annot;
538 } else {
539 annot = entry->SetSeq().SetAnnot().front();
540 }
541 } else if (entry->IsSet()) {
542 if (!entry->GetSet().IsSetAnnot()
543 || !entry->GetSet().GetAnnot().front()->IsFtable()) {
544 CRef<CSeq_annot> new_annot(new CSeq_annot());
545 entry->SetSet().SetAnnot().push_back(new_annot);
546 annot = new_annot;
547 } else {
548 annot = entry->SetSet().SetAnnot().front();
549 }
550 }
551
552 if (!feat->IsSetLocation() || feat->GetLocation().Which() == CSeq_loc::e_not_set) {
553 CRef<CSeq_entry> nuc_entry = FindNucInSeqEntry(entry);
554 if (nuc_entry) {
555 CRef<CSeq_id> id(new CSeq_id());
556 id->Assign(*(nuc_entry->GetSeq().GetId().front()));
557 feat->SetLocation().SetInt().SetId(*id);
558 feat->SetLocation().SetInt().SetFrom(0);
559 feat->SetLocation().SetInt().SetTo(entry->GetSeq().GetLength() - 1);
560 }
561 }
562
563 annot->SetData().SetFtable().push_back(feat);
564 }
565
566
MakeProteinForNucProtSet(string id,string protein_name)567 static CRef<CSeq_entry> MakeProteinForNucProtSet (string id, string protein_name)
568 {
569 // make protein
570 CRef<CBioseq> pseq(new CBioseq());
571 pseq->SetInst().SetMol(CSeq_inst::eMol_aa);
572 pseq->SetInst().SetRepr(CSeq_inst::eRepr_raw);
573 pseq->SetInst().SetSeq_data().SetIupacaa().Set("MPRKTEIN");
574 pseq->SetInst().SetLength(8);
575
576 CRef<CSeq_id> pid(new CSeq_id());
577 pid->SetLocal().SetStr (id);
578 pseq->SetId().push_back(pid);
579
580 CRef<CSeqdesc> mpdesc(new CSeqdesc());
581 mpdesc->SetMolinfo().SetBiomol(CMolInfo::eBiomol_peptide);
582 pseq->SetDescr().Set().push_back(mpdesc);
583
584 CRef<CSeq_entry> pentry(new CSeq_entry());
585 pentry->SetSeq(*pseq);
586
587 CRef<CSeq_feat> feat (new CSeq_feat());
588 feat->SetData().SetProt().SetName().push_back(protein_name);
589 feat->SetLocation().SetInt().SetId().SetLocal().SetStr(id);
590 feat->SetLocation().SetInt().SetFrom(0);
591 feat->SetLocation().SetInt().SetTo(7);
592 AddFeat (feat, pentry);
593
594 return pentry;
595 }
596
597
MakeCDSForNucProtSet(string nuc_id,string prot_id)598 static CRef<CSeq_feat> MakeCDSForNucProtSet (string nuc_id, string prot_id)
599 {
600 CRef<CSeq_feat> cds (new CSeq_feat());
601 cds->SetData().SetCdregion();
602 cds->SetProduct().SetWhole().SetLocal().SetStr(prot_id);
603 cds->SetLocation().SetInt().SetId().SetLocal().SetStr(nuc_id);
604 cds->SetLocation().SetInt().SetFrom(0);
605 cds->SetLocation().SetInt().SetTo(26);
606 return cds;
607 }
608
609
MakeGeneForNucProtSet(const string & nuc_id,const string & locus,const string & allele=kEmptyStr)610 static CRef<CSeq_feat> MakeGeneForNucProtSet(const string& nuc_id, const string& locus, const string& allele = kEmptyStr)
611 {
612 CRef<CSeq_feat> gene(new CSeq_feat());
613 gene->SetData().SetGene().SetLocus(locus);
614 if (!allele.empty()) {
615 gene->SetData().SetGene().SetAllele(allele);
616 }
617 gene->SetLocation().SetInt().SetId().SetLocal().SetStr(nuc_id);
618 gene->SetLocation().SetInt().SetFrom(0);
619 gene->SetLocation().SetInt().SetTo(26);
620 return gene;
621 }
622
623
BuildNucProtSet(const string & protein_name,const string & locus=kEmptyStr,const string & allele=kEmptyStr)624 static CRef<CSeq_entry> BuildNucProtSet(const string& protein_name, const string& locus = kEmptyStr, const string& allele = kEmptyStr)
625 {
626 CRef<CBioseq_set> set(new CBioseq_set());
627 set->SetClass(CBioseq_set::eClass_nuc_prot);
628
629 // make nucleotide
630 CRef<CBioseq> nseq(new CBioseq());
631 nseq->SetInst().SetMol(CSeq_inst::eMol_dna);
632 nseq->SetInst().SetRepr(CSeq_inst::eRepr_raw);
633 nseq->SetInst().SetSeq_data().SetIupacna().Set("ATGCCCAGAAAAACAGAGATAAACTAAGGGATGCCCAGAAAAACAGAGATAAACTAAGGG");
634 nseq->SetInst().SetLength(60);
635
636 CRef<CSeq_id> id(new CSeq_id());
637 id->SetLocal().SetStr ("nuc");
638 nseq->SetId().push_back(id);
639
640 CRef<CSeqdesc> mdesc(new CSeqdesc());
641 mdesc->SetMolinfo().SetBiomol(CMolInfo::eBiomol_genomic);
642 nseq->SetDescr().Set().push_back(mdesc);
643
644 CRef<CSeq_entry> nentry(new CSeq_entry());
645 nentry->SetSeq(*nseq);
646
647 if (!locus.empty()) {
648 CRef<CSeq_feat> gene = MakeGeneForNucProtSet("nuc", locus, allele);
649 AddFeat(gene, nentry);
650 }
651
652 set->SetSeq_set().push_back(nentry);
653
654 // make protein
655 CRef<CSeq_entry> pentry = MakeProteinForNucProtSet("prot", protein_name);
656
657 set->SetSeq_set().push_back(pentry);
658
659 CRef<CSeq_entry> set_entry(new CSeq_entry());
660 set_entry->SetSet(*set);
661
662 CRef<CSeq_feat> cds = MakeCDSForNucProtSet("nuc", "prot");
663 AddFeat (cds, set_entry);
664
665 return set_entry;
666 }
667
668
669 // tests
670
BOOST_AUTO_TEST_CASE(Test_SimpleAutodef)671 BOOST_AUTO_TEST_CASE(Test_SimpleAutodef)
672 {
673 // prepare entry
674 CRef<CSeq_entry> entry = BuildSequence();
675 AddSource (entry, "Homo sapiens");
676 AddTitle(entry, "Homo sapiens sequence.");
677
678 CheckDeflineMatches(entry);
679 }
680
681
BOOST_AUTO_TEST_CASE(Test_UnnamedPlasmid)682 BOOST_AUTO_TEST_CASE(Test_UnnamedPlasmid)
683 {
684 // prepare entry
685 CRef<CSeq_entry> entry = BuildSequence();
686 CRef<CSeqdesc> desc = AddSource (entry, "Alcanivorax sp. HA03");
687 desc->SetSource().SetGenome(CBioSource::eGenome_plasmid);
688 CRef<CSubSource> sub(new CSubSource("plasmid-name", "unnamed"));
689 desc->SetSource().SetSubtype().push_back(sub);
690 AddTitle(entry, "Alcanivorax sp. HA03 plasmid sequence.");
691
692 CheckDeflineMatches(entry);
693 }
694
695
BOOST_AUTO_TEST_CASE(Test_SQD_476)696 BOOST_AUTO_TEST_CASE(Test_SQD_476)
697 {
698 CRef<CSeq_entry> entry = BuildNucProtSet("chlorocatechol 1,2-dioxygenase");
699 CRef<CSeqdesc> desc = AddSource (entry, "Alcanivorax sp. HA03");
700 desc->SetSource().SetGenome(CBioSource::eGenome_plasmid);
701 CRef<CSubSource> sub(new CSubSource("plasmid-name", "unnamed"));
702 desc->SetSource().SetSubtype().push_back(sub);
703 AddTitle(entry, "Alcanivorax sp. HA03 plasmid chlorocatechol 1,2-dioxygenase gene, complete cds.");
704
705 CheckDeflineMatches(entry);
706 }
707
708
BOOST_AUTO_TEST_CASE(Test_SQD_630)709 BOOST_AUTO_TEST_CASE(Test_SQD_630)
710 {
711 CRef<CSeq_entry> entry = BuildSequence();
712 CRef<CSeqdesc> desc = AddSource (entry, "Clathrina aurea");
713 CRef<CSubSource> sub(new CSubSource("clone", "Cau_E6"));
714 desc->SetSource().SetSubtype().push_back(sub);
715 CRef<CSeq_feat> feat(new CSeq_feat());
716 feat->SetData().SetImp().SetKey("repeat_region");
717 CRef<CGb_qual> qual(new CGb_qual("satellite", "microsatellite"));
718 feat->SetQual().push_back(qual);
719 AddFeat(feat, entry);
720
721 AddTitle(entry, "Clathrina aurea microsatellite sequence.");
722
723 CheckDeflineMatches(entry);
724
725 feat->SetComment("dinucleotide");
726 CheckDeflineMatches(entry);
727 }
728
729
BOOST_AUTO_TEST_CASE(Test_SQD_169)730 BOOST_AUTO_TEST_CASE(Test_SQD_169)
731 {
732 CRef<CSeq_entry> entry = BuildSequence();
733 CRef<CSeqdesc> desc = AddSource (entry, "Clathrina aurea");
734 CRef<CSeq_feat> feat(new CSeq_feat());
735 feat->SetData().SetImp().SetKey("misc_feature");
736 feat->SetComment("contains 5S ribosomal RNA and nontranscribed spacer");
737 AddFeat(feat, entry);
738
739 AddTitle(entry, "Clathrina aurea 5S ribosomal RNA gene region.");
740
741 CheckDeflineMatches(entry);
742 }
743
744
BOOST_AUTO_TEST_CASE(Test_SQD_374)745 BOOST_AUTO_TEST_CASE(Test_SQD_374)
746 {
747 CRef<CSeq_entry> entry = BuildSequence();
748 CRef<CSeqdesc> desc = AddSource (entry, "Clathrina aurea");
749 CRef<CSeq_feat> feat(new CSeq_feat());
750 feat->SetData().SetImp().SetKey("misc_feature");
751 feat->SetComment("contains DNA lyase (Apn2) gene, Apn2-Mat1 intergenic spacer, and mating type protein (Mat1) gene");
752 AddFeat(feat, entry);
753 feat->SetLocation().SetPartialStart(true, eExtreme_Biological);
754 feat->SetLocation().SetPartialStop(true, eExtreme_Biological);
755
756 AddTitle(entry, "Clathrina aurea DNA lyase (Apn2) gene, partial sequence; Apn2-Mat1 intergenic spacer, complete sequence; and mating type protein (Mat1) gene, partial sequence.");
757
758 CheckDeflineMatches(entry);
759 }
760
761
BOOST_AUTO_TEST_CASE(Test_SQD_155)762 BOOST_AUTO_TEST_CASE(Test_SQD_155)
763 {
764 CRef<CSeq_entry> entry = BuildSequence();
765 CRef<CSeqdesc> desc = AddSource (entry, "Clathrina aurea");
766 CRef<CSeq_feat> feat(new CSeq_feat());
767 feat->SetData().SetImp().SetKey("misc_feature");
768 feat->SetComment("amplified with primers designed for 16S ribosomal RNA");
769 AddFeat(feat, entry);
770
771 AddTitle(entry, "Clathrina aurea sequence.");
772
773 CheckDeflineMatches(entry);
774 }
775
BOOST_AUTO_TEST_CASE(Test_DocsumTitle_Popset)776 BOOST_AUTO_TEST_CASE(Test_DocsumTitle_Popset)
777 {
778 CRef<CSeq_entry> seq1 = unit_test_util::BuildGoodNucProtSet();
779 unit_test_util::SetTaxname(seq1, "Pinus cembra");
780 // clear previous taxid before setting new one
781 unit_test_util::SetTaxon(seq1, 0);
782 unit_test_util::SetTaxon(seq1, 58041);
783 unit_test_util::SetOrgMod(seq1, COrgMod::eSubtype_isolate, "AcesapD07");
784 string defline = "Pinus cembra AcesapD07 fake protein name gene, complete cds.";
785 AddTitle(unit_test_util::GetNucleotideSequenceFromGoodNucProtSet(seq1), defline);
786
787 CRef<CSeq_entry> seq2 = unit_test_util::BuildGoodNucProtSet();
788 unit_test_util::ChangeId(seq2, "2");
789 unit_test_util::SetTaxname(seq2, "Pinus cembra");
790 // clear previous taxid before setting new one
791 unit_test_util::SetTaxon(seq2, 0);
792 unit_test_util::SetTaxon(seq2, 58041);
793 unit_test_util::SetOrgMod(seq2, COrgMod::eSubtype_isolate, "AcesapD12");
794 defline = "Pinus cembra AcesapD12 fake protein name gene, complete cds.";
795 AddTitle(unit_test_util::GetNucleotideSequenceFromGoodNucProtSet(seq2), defline);
796
797 CRef<CSeq_entry> seq3 = unit_test_util::BuildGoodNucProtSet();
798 unit_test_util::ChangeId(seq3, "3");
799 unit_test_util::SetTaxname(seq3, "Pinus cembra");
800 // clear previous taxid before setting new one
801 unit_test_util::SetTaxon(seq3, 0);
802 unit_test_util::SetTaxon(seq3, 58041);
803 unit_test_util::SetOrgMod(seq3, COrgMod::eSubtype_isolate, "AcesapD33");
804 defline = "Pinus cembra AcesapD33 fake protein name gene, complete cds.";
805 AddTitle(unit_test_util::GetNucleotideSequenceFromGoodNucProtSet(seq3), defline);
806
807
808 CRef<CSeq_entry> set(new CSeq_entry());
809 set->SetSet().SetClass(CBioseq_set::eClass_pop_set);
810 set->SetSet().SetSeq_set().push_back(seq1);
811 set->SetSet().SetSeq_set().push_back(seq2);
812 set->SetSet().SetSeq_set().push_back(seq3);
813 defline = "Pinus cembra fake protein name gene, complete cds.";
814 AddTitle(set, defline);
815 CheckDeflineMatches(set, true);
816 }
817
818
BOOST_AUTO_TEST_CASE(Test_DocsumTitle_Physet)819 BOOST_AUTO_TEST_CASE(Test_DocsumTitle_Physet)
820 {
821 CRef<CSeq_entry> seq1 = unit_test_util::BuildGoodNucProtSet();
822 unit_test_util::SetTaxname(seq1, "Bembidion mendocinum");
823 // clear previous taxid before setting new one
824 unit_test_util::SetTaxon(seq1, 0);
825 unit_test_util::SetTaxon(seq1, 1353850);
826 string defline = "Bembidion mendocinum fake protein name gene, complete cds.";
827 AddTitle(unit_test_util::GetNucleotideSequenceFromGoodNucProtSet(seq1), defline);
828
829 CRef<CSeq_entry> seq2 = unit_test_util::BuildGoodNucProtSet();
830 unit_test_util::ChangeId(seq2, "2");
831 unit_test_util::SetTaxname(seq2, "Bembidion orregoi");
832 // clear previous taxid before setting new one
833 unit_test_util::SetTaxon(seq2, 0);
834 unit_test_util::SetTaxon(seq2, 1353851);
835 defline = "Bembidion orregoi fake protein name gene, complete cds.";
836 AddTitle(unit_test_util::GetNucleotideSequenceFromGoodNucProtSet(seq2), defline);
837
838 CRef<CSeq_entry> set(new CSeq_entry());
839 set->SetSet().SetClass(CBioseq_set::eClass_pop_set);
840 set->SetSet().SetSeq_set().push_back(seq1);
841 set->SetSet().SetSeq_set().push_back(seq2);
842 defline = "Chilioperyphus fake protein name gene, complete cds.";
843 AddTitle(set, defline);
844 CheckDeflineMatches(set, true);
845 }
846
847
BOOST_AUTO_TEST_CASE(Test_GB_3108)848 BOOST_AUTO_TEST_CASE(Test_GB_3108)
849 {
850 CRef<CSeq_entry> entry = BuildSequence();
851 CRef<CSeqdesc> desc = AddSource (entry, "Fusarium incarnatum");
852 CRef<CSeq_feat> feat1(new CSeq_feat());
853 feat1->SetData().SetRna().SetType(CRNA_ref::eType_rRNA);
854 feat1->SetData().SetRna().SetExt().SetName("5.8S ribosomal RNA");
855 AddFeat(feat1, entry);
856 feat1->SetLocation().SetInt().SetTo(19);
857 feat1->SetLocation().SetPartialStart(true, eExtreme_Biological);
858 CRef<CSeq_feat> feat2(new CSeq_feat());
859 feat2->SetData().SetRna().SetType(CRNA_ref::eType_miscRNA);
860 feat2->SetData().SetRna().SetExt().SetName("internal transcribed spacer 2");
861 AddFeat(feat2, entry);
862 feat2->SetLocation().SetInt().SetFrom(20);
863 feat2->SetLocation().SetInt().SetTo(39);
864
865 CRef<CSeq_feat> feat3(new CSeq_feat());
866 feat3->SetData().SetRna().SetType(CRNA_ref::eType_rRNA);
867 feat3->SetData().SetRna().SetExt().SetName("28S ribosomal RNA");
868 AddFeat(feat3, entry);
869 feat3->SetLocation().SetInt().SetFrom(40);
870 feat3->SetLocation().SetInt().SetTo(59);
871 feat3->SetLocation().SetPartialStop(true, eExtreme_Biological);
872
873 AddTitle(entry, "Fusarium incarnatum 5.8S ribosomal RNA gene, partial sequence; internal transcribed spacer 2, complete sequence; and 28S ribosomal RNA gene, partial sequence.");
874
875 CheckDeflineMatches(entry);
876
877 feat2->SetData().SetRna().SetType(CRNA_ref::eType_other);
878 CheckDeflineMatches(entry);
879
880 }
881
882
BOOST_AUTO_TEST_CASE(Test_GB_3099)883 BOOST_AUTO_TEST_CASE(Test_GB_3099)
884 {
885 CRef<CSeq_entry> seq = unit_test_util::BuildGoodNucProtSet();
886 unit_test_util::SetTaxname(seq, "Influenza A virus (A/USA/RVD1_H1/2011(H1N1))");
887 string defline = "Influenza A virus (A/USA/RVD1_H1/2011(H1N1)) hemagglutinin (HA) gene, complete cds.";
888 CRef<CSeq_entry> nuc = unit_test_util::GetNucleotideSequenceFromGoodNucProtSet(seq);
889 AddTitle(nuc, defline);
890 unit_test_util::SetNucProtSetProductName(seq, "hemagglutinin");
891 CRef<CSeq_feat> gene(new CSeq_feat());
892 gene->SetData().SetGene().SetLocus("HA");
893 AddFeat(gene, nuc);
894
895 CheckDeflineMatches(seq, true);
896 }
897
898
BOOST_AUTO_TEST_CASE(Test_GB_3359)899 BOOST_AUTO_TEST_CASE(Test_GB_3359)
900 {
901 CRef<CSeq_entry> seq = unit_test_util::BuildGoodSeq();
902 unit_test_util::SetTaxname(seq, "Erwinia amylovora");
903 seq->SetSeq().SetInst().SetMol(CSeq_inst::eMol_rna);
904 unit_test_util::SetBiomol(seq, CMolInfo::eBiomol_transcribed_RNA);
905 CRef<CSeq_feat> ncrna = unit_test_util::BuildGoodFeat ();
906 ncrna->SetData().SetRna().SetType(CRNA_ref::eType_ncRNA);
907 ncrna->SetData().SetRna().SetExt().SetGen().SetProduct("RmaA");
908 ncrna->SetData().SetRna().SetExt().SetGen().SetClass("antisense_RNA");
909 unit_test_util::AddFeat (ncrna, seq);
910 string defline = "Erwinia amylovora RmaA antisense RNA, complete sequence.";
911 AddTitle(seq, defline);
912 CheckDeflineMatches(seq, true);
913
914 }
915
916
TestOneOrganelleSequenceDefline(CBioSource::TGenome genome,const string & defline)917 void TestOneOrganelleSequenceDefline(CBioSource::TGenome genome, const string& defline)
918 {
919 CRef<CSeq_entry> seq = unit_test_util::BuildGoodSeq();
920 unit_test_util::SetGenome(seq, genome);
921 AddTitle(seq, defline);
922 CheckDeflineMatches(seq, true, objects::CAutoDefOptions::eSequence);
923 }
924
925
BOOST_AUTO_TEST_CASE(Test_SQD_1733)926 BOOST_AUTO_TEST_CASE(Test_SQD_1733)
927 {
928 TestOneOrganelleSequenceDefline(CBioSource::eGenome_unknown, "Sebaea microphylla genomic sequence.");
929 TestOneOrganelleSequenceDefline(CBioSource::eGenome_mitochondrion, "Sebaea microphylla mitochondrion sequence.");
930 TestOneOrganelleSequenceDefline(CBioSource::eGenome_apicoplast, "Sebaea microphylla apicoplast sequence.");
931 TestOneOrganelleSequenceDefline(CBioSource::eGenome_chloroplast, "Sebaea microphylla chloroplast sequence.");
932 TestOneOrganelleSequenceDefline(CBioSource::eGenome_kinetoplast, "Sebaea microphylla kinetoplast sequence.");
933 TestOneOrganelleSequenceDefline(CBioSource::eGenome_leucoplast, "Sebaea microphylla leucoplast sequence.");
934
935 }
936
937
AddExon(CRef<CSeq_entry> seq,const string & number,TSeqPos start)938 void AddExon(CRef<CSeq_entry> seq, const string& number, TSeqPos start)
939 {
940 CRef<CSeq_feat> exon = unit_test_util::AddGoodImpFeat(seq, "exon");
941 exon->ResetComment();
942 exon->SetLocation().SetInt().SetFrom(start);
943 exon->SetLocation().SetInt().SetTo(start + 5);
944 if (!NStr::IsBlank(number)) {
945 CRef<CGb_qual> qual(new CGb_qual());
946 qual->SetQual("number");
947 qual->SetVal(number);
948 exon->SetQual().push_back(qual);
949 }
950 }
951
952
BOOST_AUTO_TEST_CASE(Test_GB_3386)953 BOOST_AUTO_TEST_CASE(Test_GB_3386)
954 {
955 CRef<CSeq_entry> nps = unit_test_util::BuildGoodNucProtSet();
956 CRef<CSeq_entry> nuc = unit_test_util::GetNucleotideSequenceFromGoodNucProtSet(nps);
957 CRef<CSeq_feat> cds = unit_test_util::GetCDSFromGoodNucProtSet (nps);
958 cds->SetLocation().SetPartialStop(true, eExtreme_Biological);
959 AddExon(nuc, "1", cds->GetLocation().GetStart(eExtreme_Positional));
960
961 string defline = "Sebaea microphylla fake protein name gene, exon 1 and partial cds.";
962 AddTitle(nuc, defline);
963 CheckDeflineMatches(nps, true);
964
965 AddExon(nuc, "2", cds->GetLocation().GetStart(eExtreme_Positional) + 10);
966 defline = "Sebaea microphylla fake protein name gene, exons 1 and 2 and partial cds.";
967 AddTitle(nuc, defline);
968 CheckDeflineMatches(nps, true);
969
970 AddExon(nuc, "3", cds->GetLocation().GetStart(eExtreme_Positional) +20);
971 defline = "Sebaea microphylla fake protein name gene, exons 1 through 3 and partial cds.";
972 AddTitle(nuc, defline);
973 CheckDeflineMatches(nps, true);
974 }
975
976
BOOST_AUTO_TEST_CASE(Test_GB_3410)977 BOOST_AUTO_TEST_CASE(Test_GB_3410)
978 {
979 CRef<CSeq_entry> seq = unit_test_util::BuildGoodSeq();
980 CRef<objects::CSeq_feat> misc = unit_test_util::AddMiscFeature(seq);
981 misc->SetData().SetRna().SetType(CRNA_ref::eType_miscRNA);
982 misc->SetComment("contains internal transcribed spacer 1, 5.8S ribosomal RNA, and internal transcribed spacer 2");
983 AddTitle(seq, "Sebaea microphylla internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence.");
984
985 CheckDeflineMatches(seq, true);
986
987 misc->SetLocation().SetPartialStart(true, eExtreme_Biological);
988 misc->SetLocation().SetPartialStop(true, eExtreme_Biological);
989 AddTitle(seq, "Sebaea microphylla internal transcribed spacer 1, partial sequence; 5.8S ribosomal RNA gene, complete sequence; and internal transcribed spacer 2, partial sequence.");
990 CheckDeflineMatches(seq, true);
991
992 misc->SetComment("contains 18S ribosomal RNA, internal transcribed spacer 1, 5.8S ribosomal RNA, and internal transcribed spacer 2");
993 AddTitle(seq, "Sebaea microphylla 18S ribosomal RNA gene, partial sequence; internal transcribed spacer 1 and 5.8S ribosomal RNA gene, complete sequence; and internal transcribed spacer 2, partial sequence.");
994 CheckDeflineMatches(seq, true);
995 }
996
997
BOOST_AUTO_TEST_CASE(Test_GB_3395)998 BOOST_AUTO_TEST_CASE(Test_GB_3395)
999 {
1000 CRef<CSeq_entry> seq = unit_test_util::BuildGoodSeq();
1001 CRef<objects::CSeq_feat> dloop = unit_test_util::AddGoodImpFeat (seq, "D-loop");
1002 dloop->ResetComment();
1003 AddTitle(seq, "Sebaea microphylla D-loop, complete sequence.");
1004 CheckDeflineMatches(seq, true);
1005 }
1006
1007
BOOST_AUTO_TEST_CASE(Test_GB_3439)1008 BOOST_AUTO_TEST_CASE(Test_GB_3439)
1009 {
1010 CRef<CSeq_entry> seq = unit_test_util::BuildGoodSeq();
1011 unit_test_util::SetTaxname(seq, "uncultured archaeon");
1012 CRef<objects::CSeq_feat> dloop = unit_test_util::AddGoodImpFeat (seq, "D-loop");
1013 dloop->ResetComment();
1014 AddTitle(seq, "Uncultured archaeon D-loop, complete sequence.");
1015 CheckDeflineMatches(seq, true);
1016
1017 }
1018
1019
BOOST_AUTO_TEST_CASE(Test_GB_3488)1020 BOOST_AUTO_TEST_CASE(Test_GB_3488)
1021 {
1022 CRef<CSeq_entry> seq = unit_test_util::BuildGoodSeq();
1023 unit_test_util::SetTaxname(seq, "Cypripedium japonicum");
1024 CRef<objects::CSeq_feat> rna = unit_test_util::AddMiscFeature(seq);
1025 rna->SetData().SetRna().SetType(CRNA_ref::eType_rRNA);
1026 rna->ResetComment();
1027 AddTitle(seq, "Cypripedium japonicum gene, complete sequence.");
1028 CheckDeflineMatches(seq, true);
1029
1030 }
1031
BOOST_AUTO_TEST_CASE(Test_GB_3486)1032 BOOST_AUTO_TEST_CASE(Test_GB_3486)
1033 {
1034 CRef<CSeq_entry> seq = unit_test_util::BuildGoodSeq();
1035 unit_test_util::SetTaxname(seq, "Burkholderia sp. FERM BP-3421");
1036 CRef<objects::CSeq_feat> gene = unit_test_util::AddMiscFeature (seq);
1037 gene->ResetComment();
1038 gene->SetData().SetGene().SetLocus("fr9A");
1039 AddTitle(seq, "Burkholderia sp. FERM BP-3421 fr9A gene, complete sequence.");
1040 CheckDeflineMatches(seq, true);
1041
1042 CRef<objects::CSeq_feat> gene_cluster = unit_test_util::AddMiscFeature(seq);
1043 gene_cluster->SetComment("spliceostatin/FR901464 biosynthetic gene cluster");
1044
1045 AddTitle(seq, "Burkholderia sp. FERM BP-3421 spliceostatin/FR901464 biosynthetic gene cluster, complete sequence.");
1046 CheckDeflineMatches(seq, true);
1047
1048 }
1049
1050
BOOST_AUTO_TEST_CASE(Test_GB_3496)1051 BOOST_AUTO_TEST_CASE(Test_GB_3496)
1052 {
1053 CRef<CSeq_entry> entry = unit_test_util::BuildGoodNucProtSet();
1054 CRef<CSeq_feat> cds = unit_test_util::GetCDSFromGoodNucProtSet (entry);
1055 CRef<CSeq_feat> gene = unit_test_util::MakeGeneForFeature (cds);
1056 gene->SetData().SetGene().SetLocus("matK");
1057 CRef<CSeq_entry> nuc = unit_test_util::GetNucleotideSequenceFromGoodNucProtSet (entry);
1058 unit_test_util::AddFeat(gene, nuc);
1059 CRef<CSeq_feat> prot = unit_test_util::GetProtFeatFromGoodNucProtSet (entry);
1060 prot->SetData().SetProt().SetName().front() = "maturase K";
1061
1062 CRef<CSeq_feat> intron = unit_test_util::AddGoodImpFeat (nuc, "intron");
1063 intron->SetLocation().SetInt().SetTo(nuc->GetSeq().GetLength() - 1);
1064 intron->SetLocation().SetPartialStart(true, eExtreme_Biological);
1065 intron->SetLocation().SetPartialStop(true, eExtreme_Biological);
1066 intron->SetPartial(true);
1067 CRef<CSeq_feat> gene2 = unit_test_util::MakeGeneForFeature (intron);
1068 gene2->SetData().SetGene().SetLocus("trnK");
1069 gene2->SetData().SetGene().SetDesc("tRNA-Lys");
1070 unit_test_util::AddFeat(gene2, nuc);
1071
1072 AddTitle(nuc, "Sebaea microphylla tRNA-Lys (trnK) gene, partial sequence; and maturase K (matK) gene, complete cds.");
1073 CheckDeflineMatches(entry, true);
1074
1075 }
1076
1077
BOOST_AUTO_TEST_CASE(Test_GB_3458)1078 BOOST_AUTO_TEST_CASE(Test_GB_3458)
1079 {
1080 // if second coding region does not have protein name, should still not be considered alternatively spliced
1081 CRef<CSeq_entry> entry = unit_test_util::BuildGoodNucProtSet();
1082 CRef<CSeq_entry> nuc = unit_test_util::GetNucleotideSequenceFromGoodNucProtSet (entry);
1083 CRef<CSeq_feat> cds1 = unit_test_util::GetCDSFromGoodNucProtSet (entry);
1084 CRef<CSeq_feat> gene1 = unit_test_util::MakeGeneForFeature (cds1);
1085 gene1->SetData().SetGene().SetLocus("M1");
1086 unit_test_util::AddFeat(gene1, nuc);
1087 CRef<CSeq_feat> cds2 = unit_test_util::AddMiscFeature(nuc);
1088 cds2->SetData().SetCdregion();
1089 cds2->ResetComment();
1090 cds2->SetLocation().SetInt().SetFrom(cds1->GetLocation().GetStart(eExtreme_Positional));
1091 cds2->SetLocation().SetInt().SetTo(nuc->GetSeq().GetInst().GetLength() - 1);
1092 CRef<CSeq_feat> gene2 = unit_test_util::MakeGeneForFeature (cds2);
1093 gene2->SetData().SetGene().SetLocus("M2");
1094 unit_test_util::AddFeat(gene2, nuc);
1095 // make protein for second coding region, with no protein feature
1096 CRef<CSeq_entry> pentry(new CSeq_entry());
1097 pentry->SetSeq().SetInst().SetMol(objects::CSeq_inst::eMol_aa);
1098 pentry->SetSeq().SetInst().SetRepr(objects::CSeq_inst::eRepr_raw);
1099 pentry->SetSeq().SetInst().SetSeq_data().SetIupacaa().Set("MPRKTEIN");
1100 pentry->SetSeq().SetInst().SetLength(8);
1101
1102 CRef<objects::CSeq_id> pid(new objects::CSeq_id());
1103 pid->SetLocal().SetStr ("prot2");
1104 pentry->SetSeq().SetId().push_back(pid);
1105 entry->SetSet().SetSeq_set().push_back(pentry);
1106 cds2->SetProduct().SetWhole().SetLocal().SetStr("prot2");
1107
1108 AddTitle(nuc, "Sebaea microphylla M2 and fake protein name (M1) genes, complete cds.");
1109 CheckDeflineMatches(entry, true);
1110 }
1111
1112
BOOST_AUTO_TEST_CASE(Test_GB_3679)1113 BOOST_AUTO_TEST_CASE(Test_GB_3679)
1114 {
1115 // if second coding region does not have protein name, should still not be considered alternatively spliced
1116 CRef<CSeq_entry> entry = unit_test_util::BuildGoodNucProtSet();
1117 CRef<CSeq_entry> nuc = unit_test_util::GetNucleotideSequenceFromGoodNucProtSet (entry);
1118 CRef<CSeq_feat> cds1 = unit_test_util::GetCDSFromGoodNucProtSet (entry);
1119 CRef<CSeq_feat> gene1 = unit_test_util::MakeGeneForFeature (cds1);
1120 gene1->SetData().SetGene().SetLocus("M1");
1121 unit_test_util::AddFeat(gene1, nuc);
1122
1123 CRef<CSeq_feat> integron = unit_test_util::AddMiscFeature(nuc);
1124 integron->SetData().SetImp().SetKey("mobile_element");
1125 CRef<CGb_qual> q(new CGb_qual());
1126 q->SetQual("mobile_element_type");
1127 q->SetVal("integron:class I");
1128 integron->SetQual().push_back(q);
1129 integron->SetLocation().SetInt().SetFrom(0);
1130 integron->SetLocation().SetInt().SetTo(nuc->GetSeq().GetLength() - 1);
1131
1132
1133 AddTitle(nuc, "Sebaea microphylla class I integron fake protein name (M1) gene, complete cds.");
1134 CheckDeflineMatches(entry, true);
1135 }
1136
1137
BOOST_AUTO_TEST_CASE(Test_GB_3848)1138 BOOST_AUTO_TEST_CASE(Test_GB_3848)
1139 {
1140 CRef<CSeq_entry> entry = unit_test_util::BuildGoodNucProtSet();
1141 CRef<CSeq_entry> nuc = unit_test_util::GetNucleotideSequenceFromGoodNucProtSet (entry);
1142 CRef<CSeq_feat> cds1 = unit_test_util::GetCDSFromGoodNucProtSet (entry);
1143 CRef<CSeq_feat> gene1 = unit_test_util::MakeGeneForFeature (cds1);
1144 gene1->SetData().SetGene().SetLocus("gltB");
1145 gene1->SetData().SetGene().SetAllele("16");
1146 unit_test_util::AddFeat(gene1, nuc);
1147
1148 AddTitle(nuc, "Sebaea microphylla fake protein name (gltB) gene, gltB-16 allele, complete cds.");
1149 CheckDeflineMatches(entry, true);
1150 }
1151
1152
BOOST_AUTO_TEST_CASE(Test_SQD_2075)1153 BOOST_AUTO_TEST_CASE(Test_SQD_2075)
1154 {
1155 CRef<CSeq_entry> seq = unit_test_util::BuildGoodSeq();
1156 CRef<objects::CSeq_feat> misc = unit_test_util::AddMiscFeature(seq);
1157 misc->SetComment("contains tRNA-Pro and control region");
1158 misc->SetLocation().SetPartialStart(true, eExtreme_Biological);
1159 misc->SetLocation().SetPartialStop(true, eExtreme_Biological);
1160 AddTitle(seq, "Sebaea microphylla tRNA-Pro gene and control region, partial sequence.");
1161 CheckDeflineMatches(seq, true);
1162 }
1163
1164
BOOST_AUTO_TEST_CASE(Test_SQD_2115)1165 BOOST_AUTO_TEST_CASE(Test_SQD_2115)
1166 {
1167 CRef<CSeq_entry> seq = unit_test_util::BuildGoodSeq();
1168 CRef<objects::CSeq_feat> promoter = unit_test_util::AddMiscFeature(seq);
1169 promoter->ResetComment();
1170 promoter->SetData().SetImp().SetKey("regulatory");
1171 CRef<CGb_qual> q(new CGb_qual());
1172 q->SetQual("regulatory_class");
1173 q->SetVal("promoter");
1174 promoter->SetQual().push_back(q);
1175 AddTitle(seq, "Sebaea microphylla promoter region.");
1176 CheckDeflineMatches(seq, true);
1177
1178 CRef<CSeq_feat> gene = unit_test_util::MakeGeneForFeature (promoter);
1179 gene->SetData().SetGene().SetLocus("chs");
1180 unit_test_util::AddFeat(gene, seq);
1181
1182 AddTitle(seq, "Sebaea microphylla chs gene, promoter region.");
1183 CheckDeflineMatches(seq, true);
1184
1185 }
1186
1187
BOOST_AUTO_TEST_CASE(Test_GB_3866)1188 BOOST_AUTO_TEST_CASE(Test_GB_3866)
1189 {
1190 CRef<CSeq_entry> seq = unit_test_util::BuildGoodSeq();
1191 CRef<objects::CSeq_feat> misc1 = unit_test_util::AddMiscFeature(seq);
1192 misc1->SetData().SetRna().SetType(CRNA_ref::eType_miscRNA);
1193 misc1->SetComment("contains 18S ribosomal RNA and internal transcribed spacer 1");
1194 misc1->SetLocation().SetInt().SetFrom(0);
1195 misc1->SetLocation().SetInt().SetTo(15);
1196 misc1->SetLocation().SetPartialStart(true, eExtreme_Biological);
1197
1198 CRef<objects::CSeq_feat> rna = unit_test_util::AddMiscFeature(seq);
1199 rna->SetData().SetRna().SetType(CRNA_ref::eType_rRNA);
1200 rna->SetData().SetRna().SetExt().SetName("5.8S ribosomal RNA");
1201 rna->SetLocation().SetInt().SetFrom(16);
1202 rna->SetLocation().SetInt().SetTo(19);
1203
1204 CRef<objects::CSeq_feat> misc2 = unit_test_util::AddMiscFeature(seq);
1205 misc2->SetData().SetRna().SetType(CRNA_ref::eType_miscRNA);
1206 misc2->SetComment("contains internal transcribed spacer 2 and 28S ribosomal RNA");
1207 misc2->SetLocation().SetInt().SetFrom(20);
1208 misc2->SetLocation().SetInt().SetTo(35);
1209 misc2->SetLocation().SetPartialStop(true, eExtreme_Biological);
1210
1211 AddTitle(seq, "Sebaea microphylla 18S ribosomal RNA gene, partial \
1212 sequence; internal transcribed spacer 1, 5.8S ribosomal RNA gene, and \
1213 internal transcribed spacer 2, complete sequence; and 28S ribosomal RNA \
1214 gene, partial sequence.");
1215 CheckDeflineMatches(seq, true);
1216 }
1217
1218
BOOST_AUTO_TEST_CASE(Test_SQD_2118)1219 BOOST_AUTO_TEST_CASE(Test_SQD_2118)
1220 {
1221 CRef<CSeq_entry> seq = unit_test_util::BuildGoodSeq();
1222 CRef<objects::CSeq_feat> misc1 = unit_test_util::AddMiscFeature(seq);
1223 misc1->SetComment("contains tRNA-Thr, tRNA-Pro, and control region");
1224 misc1->SetLocation().SetInt().SetFrom(0);
1225 misc1->SetLocation().SetInt().SetTo(15);
1226 misc1->SetLocation().SetPartialStart(true, eExtreme_Biological);
1227 misc1->SetLocation().SetPartialStop(true, eExtreme_Biological);
1228
1229 AddTitle(seq, "Sebaea microphylla tRNA-Thr gene, partial sequence; \
1230 tRNA-Pro gene, complete sequence; and control region, partial sequence.");
1231 CheckDeflineMatches(seq, true);
1232
1233 }
1234
1235
BOOST_AUTO_TEST_CASE(Test_GB_1851)1236 BOOST_AUTO_TEST_CASE(Test_GB_1851)
1237 {
1238 CRef<CSeq_entry> seq = unit_test_util::BuildGoodSeq();
1239 CRef<objects::CSeq_feat> misc1 = unit_test_util::AddMiscFeature(seq);
1240 misc1->SetComment("nonfunctional xyz due to argle");
1241 misc1->SetLocation().SetInt().SetFrom(0);
1242 misc1->SetLocation().SetInt().SetTo(15);
1243 misc1->SetLocation().SetPartialStart(true, eExtreme_Biological);
1244 misc1->SetLocation().SetPartialStop(true, eExtreme_Biological);
1245
1246 // kept because the misc_feature is alone
1247 AddTitle(seq, "Sebaea microphylla nonfunctional xyz gene, partial sequence.");
1248 CheckDeflineMatches(seq, true, CAutoDefOptions::eListAllFeatures, CAutoDefOptions::eDelete);
1249 AddTitle(seq, "Sebaea microphylla nonfunctional xyz gene, partial sequence.");
1250 CheckDeflineMatches(seq, true, CAutoDefOptions::eListAllFeatures, CAutoDefOptions::eNoncodingProductFeat);
1251 AddTitle(seq, "Sebaea microphylla nonfunctional xyz due to argle genomic sequence.");
1252 CheckDeflineMatches(seq, true, CAutoDefOptions::eListAllFeatures, CAutoDefOptions::eCommentFeat);
1253
1254
1255 misc1->SetComment("similar to xyz");
1256 AddTitle(seq, "Sebaea microphylla xyz-like gene, partial sequence.");
1257 CheckDeflineMatches(seq, true, CAutoDefOptions::eListAllFeatures, CAutoDefOptions::eNoncodingProductFeat);
1258
1259 }
1260
1261
s_SetProteinName(CRef<CSeq_entry> prot,const string & name)1262 void s_SetProteinName(CRef<CSeq_entry> prot, const string& name)
1263 {
1264 prot->SetSeq().SetAnnot().front()->SetData().SetFtable().front()->SetData().SetProt().SetName().front() = name;
1265 }
1266
1267
s_AddCDS(CRef<CSeq_entry> np,const string & name,TSeqPos from,TSeqPos to)1268 CRef<CSeq_feat> s_AddCDS(CRef<CSeq_entry> np, const string& name, TSeqPos from, TSeqPos to)
1269 {
1270 CRef<CSeq_entry> prev_prot = np->SetSet().SetSeq_set().back();
1271 CRef<CSeq_entry> new_prot (new CSeq_entry());
1272 new_prot->Assign(*prev_prot);
1273 CRef<CSeq_id> new_id(new CSeq_id());
1274 new_id->Assign(*(prev_prot->GetSeq().GetId().front()));
1275 size_t pos = NStr::Find(new_id->GetLocal().GetStr(), "_");
1276 string prefix = new_id->GetLocal().GetStr().substr(0, pos+ 1);
1277 string suffix = new_id->GetLocal().GetStr().substr(pos + 1);
1278 int prev_offset = NStr::StringToInt(suffix);
1279 new_id->SetLocal().SetStr(prefix + NStr::NumericToString(prev_offset + 1));
1280 unit_test_util::ChangeId(new_prot, new_id);
1281 s_SetProteinName(new_prot, name);
1282 np->SetSet().SetSeq_set().push_back(new_prot);
1283
1284 CRef<CSeq_feat> prev_cds = np->SetSet().SetAnnot().front()->SetData().SetFtable().back();
1285 CRef<CSeq_feat> new_cds(new CSeq_feat());
1286 new_cds->Assign(*prev_cds);
1287 new_cds->SetProduct().SetWhole().Assign(*new_id);
1288 new_cds->SetLocation().SetInt().SetFrom(from);
1289 new_cds->SetLocation().SetInt().SetTo(to);
1290 np->SetSet().SetAnnot().front()->SetData().SetFtable().push_back(new_cds);
1291 return new_cds;
1292 }
1293
1294
BOOST_AUTO_TEST_CASE(Test_GB_3942)1295 BOOST_AUTO_TEST_CASE(Test_GB_3942)
1296 {
1297 CRef<CSeq_entry> entry = unit_test_util::BuildGoodNucProtSet();
1298 CRef<CSeq_entry> nuc = unit_test_util::GetNucleotideSequenceFromGoodNucProtSet (entry);
1299 CRef<CSeq_entry> prot1 = unit_test_util::GetProteinSequenceFromGoodNucProtSet(entry);
1300 CRef<CSeq_feat> cds1 = unit_test_util::GetCDSFromGoodNucProtSet (entry);
1301
1302 unit_test_util::ChangeId(prot1, "_1");
1303 cds1->SetLocation().SetInt().SetFrom(0);
1304 cds1->SetLocation().SetInt().SetTo(5);
1305 cds1->SetProduct().SetWhole().Assign(*(prot1->GetSeq().GetId().front()));
1306 s_SetProteinName(prot1, "RNA-dependent RNA polymerase");
1307
1308 CRef<CSeq_feat> cds2 = s_AddCDS(entry, "Coat protein", 10, 25);
1309 CRef<CSeq_feat> cds3 = s_AddCDS(entry, "Movement protein", 12, 20);
1310
1311 cds1->SetLocation().SetPartialStart(true, eExtreme_Biological);
1312
1313 AddTitle(nuc, "Sebaea microphylla RNA-dependent RNA polymerase gene, partial cds; and Coat protein and Movement protein genes, complete cds.");
1314 CheckDeflineMatches(entry, true);
1315
1316
1317 // actual splicing
1318 cds2->SetLocation().Assign(*(unit_test_util::MakeMixLoc(nuc->GetSeq().GetId().front())));
1319 cds3->SetLocation().Assign(cds2->GetLocation());
1320 TSeqPos old_end = cds3->GetLocation().GetMix().Get().back()->GetInt().GetTo();
1321 cds3->SetLocation().SetMix().Set().back()->SetInt().SetTo(old_end + 2);
1322
1323 AddTitle(nuc, "Sebaea microphylla protein gene, complete cds, alternatively spliced; and RNA-dependent RNA polymerase gene, partial cds.");
1324 CheckDeflineMatches(entry, true);
1325
1326 }
1327
1328
BOOST_AUTO_TEST_CASE(Test_GB_8927)1329 BOOST_AUTO_TEST_CASE(Test_GB_8927)
1330 {
1331 CRef<CSeq_entry> entry = unit_test_util::BuildGoodNucProtSet();
1332 CRef<CSeq_entry> nuc = unit_test_util::GetNucleotideSequenceFromGoodNucProtSet(entry);
1333 CRef<CSeq_entry> prot1 = unit_test_util::GetProteinSequenceFromGoodNucProtSet(entry);
1334 CRef<CSeq_feat> cds1 = unit_test_util::GetCDSFromGoodNucProtSet(entry);
1335
1336 unit_test_util::ChangeId(prot1, "_1");
1337 cds1->SetLocation().SetInt().SetFrom(0);
1338 cds1->SetLocation().SetInt().SetTo(5);
1339 cds1->SetProduct().SetWhole().Assign(*(prot1->GetSeq().GetId().front()));
1340 s_SetProteinName(prot1, "RNA-dependent RNA polymerase");
1341
1342 CRef<CSeq_feat> cds2 = s_AddCDS(entry, "Coat protein", 10, 25);
1343 CRef<CSeq_feat> cds3 = s_AddCDS(entry, "Movement protein", 12, 20);
1344
1345 cds1->SetLocation().SetPartialStart(true, eExtreme_Biological);
1346 cds2->SetLocation().Assign(*(unit_test_util::MakeMixLoc(nuc->GetSeq().GetId().front())));
1347 cds3->SetLocation().Assign(cds2->GetLocation());
1348 TSeqPos old_end = cds3->GetLocation().GetMix().Get().back()->GetInt().GetTo();
1349 cds3->SetLocation().SetMix().Set().back()->SetInt().SetTo(old_end + 2);
1350
1351 unit_test_util::SetDiv(entry, "VRL");
1352
1353 AddTitle(nuc, "Sebaea microphylla Movement protein and Coat protein genes, complete cds; and RNA-dependent RNA polymerase gene, partial cds.");
1354 CheckDeflineMatches(entry, true);
1355 }
1356
1357
BOOST_AUTO_TEST_CASE(Test_GB_3926)1358 BOOST_AUTO_TEST_CASE(Test_GB_3926)
1359 {
1360 CRef<CSeq_entry> seq = unit_test_util::BuildGoodSeq();
1361 CRef<objects::CSeq_feat> misc1 = unit_test_util::AddMiscFeature(seq);
1362 misc1->ResetComment();
1363 misc1->SetData().SetRna().SetType(CRNA_ref::eType_rRNA);
1364 misc1->SetData().SetRna().SetExt().SetName("28S ribosomal RNA");
1365 misc1->SetLocation().SetPartialStart(true, eExtreme_Biological);
1366 misc1->SetLocation().SetPartialStop(true, eExtreme_Biological);
1367 unit_test_util::SetOrgMod(seq, COrgMod::eSubtype_isolate, "JU6");
1368 unit_test_util::SetSubSource(seq, CSubSource::eSubtype_clone, "1");
1369
1370 AddTitle(seq, "Sebaea microphylla isolate JU6 clone 1 28S ribosomal RNA gene, partial sequence.");
1371
1372 vector<CSubSource::ESubtype> subsrcs;
1373 vector<COrgMod::ESubtype> orgmods;
1374 subsrcs.push_back(CSubSource::eSubtype_clone);
1375 orgmods.push_back(COrgMod::eSubtype_isolate);
1376
1377 CheckDeflineMatches(seq, subsrcs, orgmods);
1378 }
1379
1380
BOOST_AUTO_TEST_CASE(Test_SQD_2181)1381 BOOST_AUTO_TEST_CASE(Test_SQD_2181)
1382 {
1383 CRef<CSeq_entry> entry = unit_test_util::BuildGoodNucProtSet();
1384 CRef<CSeq_entry> nuc = unit_test_util::GetNucleotideSequenceFromGoodNucProtSet (entry);
1385 CRef<CSeq_feat> cds1 = unit_test_util::GetCDSFromGoodNucProtSet (entry);
1386 CRef<objects::CSeq_feat> misc1 = unit_test_util::AddMiscFeature(nuc);
1387 misc1->ResetComment();
1388 misc1->SetData().SetImp().SetKey("regulatory");
1389 CRef<CGb_qual> q(new CGb_qual());
1390 q->SetQual("regulatory_class");
1391 q->SetVal("promoter");
1392 misc1->SetQual().push_back(q);
1393
1394 AddTitle(nuc, "Sebaea microphylla fake protein name gene, promoter region and complete cds.");
1395
1396
1397 CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
1398
1399 CRef<CScope> scope(new CScope(*object_manager));
1400 CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry (*entry);
1401
1402 objects::CAutoDefWithTaxonomy autodef;
1403
1404 // add to autodef
1405 autodef.AddSources (seh);
1406
1407 CRef<CAutoDefModifierCombo> mod_combo(new CAutoDefModifierCombo ());
1408
1409 autodef.SetFeatureListType(CAutoDefOptions::eListAllFeatures);
1410 autodef.SetMiscFeatRule(CAutoDefOptions::eDelete);
1411 autodef.SetUseFakePromoters(true);
1412
1413 CheckDeflineMatches(seh, autodef, mod_combo);
1414 }
1415
1416
BOOST_AUTO_TEST_CASE(Test_GB_3949)1417 BOOST_AUTO_TEST_CASE(Test_GB_3949)
1418 {
1419 CRef<CSeq_entry> entry = unit_test_util::BuildGoodNucProtSet();
1420 unit_test_util::SetOrgMod(entry, COrgMod::eSubtype_culture_collection, "ATCC:12345");
1421
1422 CRef<CSeq_entry> nuc = unit_test_util::GetNucleotideSequenceFromGoodNucProtSet (entry);
1423 AddTitle(nuc, "Sebaea microphylla culture ATCC:12345 fake protein name gene, complete cds.");
1424
1425 vector<CSubSource::ESubtype> subsrcs;
1426 vector<COrgMod::ESubtype> orgmods;
1427 orgmods.push_back(COrgMod::eSubtype_culture_collection);
1428
1429 CheckDeflineMatches(entry, subsrcs, orgmods);
1430 }
1431
BOOST_AUTO_TEST_CASE(Test_GB_4043)1432 BOOST_AUTO_TEST_CASE(Test_GB_4043)
1433 {
1434 CRef<CSeq_entry> entry = unit_test_util::BuildGoodNucProtSet();
1435 CRef<CSeq_feat> cds = unit_test_util::GetCDSFromGoodNucProtSet (entry);
1436 CRef<CSeq_entry> nuc = unit_test_util::GetNucleotideSequenceFromGoodNucProtSet (entry);
1437 cds->SetLocation().SetInt().SetFrom(20);
1438 cds->SetLocation().SetPartialStart(true, eExtreme_Biological);
1439 CRef<objects::CSeq_feat> intron = unit_test_util::AddMiscFeature(nuc);
1440 intron->SetData().SetImp().SetKey("intron");
1441 intron->SetLocation().SetInt().SetFrom(0);
1442 intron->SetLocation().SetInt().SetTo(19);
1443 intron->SetLocation().SetPartialStart(true, eExtreme_Biological);
1444 intron->ResetComment();
1445 intron->SetQual().push_back(CRef<CGb_qual>(new CGb_qual("number", "2")));
1446 CRef<objects::CSeq_feat> gene = unit_test_util::AddMiscFeature(nuc);
1447 gene->SetData().SetGene().SetLocus("GAPDH");
1448 gene->SetLocation().SetInt().SetFrom(0);
1449 gene->SetLocation().SetInt().SetTo(cds->GetLocation().GetInt().GetTo());
1450 gene->SetLocation().SetPartialStart(true, eExtreme_Biological);
1451 gene->ResetComment();
1452
1453 AddTitle(nuc, "Sebaea microphylla fake protein name (GAPDH) gene, intron 2 and partial cds.");
1454
1455 CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
1456
1457 CRef<CScope> scope(new CScope(*object_manager));
1458 CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry (*entry);
1459
1460 objects::CAutoDefWithTaxonomy autodef;
1461
1462 // add to autodef
1463 autodef.AddSources (seh);
1464 autodef.SetKeepIntrons(true);
1465
1466 CRef<CAutoDefModifierCombo> mod_combo;
1467 mod_combo = autodef.FindBestModifierCombo();
1468
1469 CheckDeflineMatches(seh, autodef, mod_combo);
1470 }
1471
1472
BOOST_AUTO_TEST_CASE(Test_GB_4078)1473 BOOST_AUTO_TEST_CASE(Test_GB_4078)
1474 {
1475 CRef<CSeq_entry> entry = unit_test_util::BuildGoodNucProtSet();
1476 CRef<CSeq_feat> cds = unit_test_util::GetCDSFromGoodNucProtSet (entry);
1477 CRef<CSeq_entry> nuc = unit_test_util::GetNucleotideSequenceFromGoodNucProtSet (entry);
1478 cds->SetLocation().SetPartialStart(true, eExtreme_Biological);
1479 CRef<objects::CSeq_feat> spacer = unit_test_util::AddMiscFeature(nuc);
1480 spacer->SetComment("G-L intergenic spacer");
1481 spacer->SetLocation().SetInt().SetFrom(cds->SetLocation().GetStart(eExtreme_Biological));
1482 spacer->SetLocation().SetInt().SetTo(cds->SetLocation().GetStart(eExtreme_Biological) + 2);
1483 spacer->SetLocation().SetPartialStop(true, eExtreme_Biological);
1484 CRef<objects::CSeq_feat> gene = unit_test_util::MakeGeneForFeature(cds);
1485 gene->SetData().SetGene().SetLocus("G");
1486 unit_test_util::AddFeat(gene, nuc);
1487
1488
1489 AddTitle(nuc, "Sebaea microphylla fake protein name (G) gene, partial cds; and G-L intergenic spacer, partial sequence.");
1490
1491 CheckDeflineMatches(entry);
1492
1493 unit_test_util::SetBiomol(nuc, CMolInfo::eBiomol_cRNA);
1494 nuc->SetSeq().SetInst().SetMol(CSeq_inst::eMol_rna);
1495
1496
1497 CheckDeflineMatches(entry);
1498
1499 }
1500
BOOST_AUTO_TEST_CASE(Test_SQD_2370)1501 BOOST_AUTO_TEST_CASE(Test_SQD_2370)
1502 {
1503 CRef<CSeq_entry> seq = unit_test_util::BuildGoodSeq();
1504 CRef<objects::CSeq_feat> misc1 = unit_test_util::AddMiscFeature(seq);
1505 misc1->SetComment("atpB-rbcL intergenic spacer region");
1506
1507 AddTitle(seq, "Sebaea microphylla atpB-rbcL intergenic spacer region, complete sequence.");
1508
1509 CheckDeflineMatches(seq);
1510 }
1511
BOOST_AUTO_TEST_CASE(Test_GB_4242)1512 BOOST_AUTO_TEST_CASE(Test_GB_4242)
1513 {
1514 CRef<CSeq_entry> seq = unit_test_util::BuildGoodSeq();
1515 unit_test_util::SetTaxname(seq, "Trichoderma sp. FPZSP372");
1516 unit_test_util::SetOrgMod(seq, COrgMod::eSubtype_isolate, "FPZSP37");
1517 AddTitle(seq, "Trichoderma sp. FPZSP372 sequence.");
1518
1519 vector<CSubSource::ESubtype> subsrcs;
1520 vector<COrgMod::ESubtype> orgmods;
1521 orgmods.push_back(COrgMod::eSubtype_isolate);
1522
1523 CheckDeflineMatches(seq, subsrcs, orgmods);
1524
1525 // Try again, but deliberately allow modifier that includes taxname to be included
1526 AddTitle(seq, "Trichoderma sp. FPZSP372 isolate FPZSP37 sequence.");
1527 CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
1528
1529 CRef<CScope> scope(new CScope(*object_manager));
1530 CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*seq);
1531
1532 objects::CAutoDefWithTaxonomy autodef;
1533
1534 // add to autodef
1535 autodef.AddSources(seh);
1536
1537 CRef<CAutoDefModifierCombo> mod_combo;
1538 mod_combo = new CAutoDefModifierCombo();
1539 mod_combo->SetUseModifierLabels(true);
1540 mod_combo->SetAllowModAtEndOfTaxname(true);
1541 mod_combo->SetExcludeSpOrgs(false);
1542 ITERATE(vector<CSubSource::ESubtype>, it, subsrcs) {
1543 mod_combo->AddSubsource(*it, true);
1544 }
1545 ITERATE(vector<COrgMod::ESubtype>, it, orgmods) {
1546 mod_combo->AddOrgMod(*it, true);
1547 }
1548
1549 autodef.SetFeatureListType(CAutoDefOptions::eListAllFeatures);
1550 autodef.SetMiscFeatRule(CAutoDefOptions::eDelete);
1551
1552 CheckDeflineMatches(seh, autodef, mod_combo);
1553 }
1554
BOOST_AUTO_TEST_CASE(Test_SQD_3440)1555 BOOST_AUTO_TEST_CASE(Test_SQD_3440)
1556 {
1557 CAutoDefOptions options;
1558 CAutoDefModifierCombo combo;
1559 combo.InitOptions(options);
1560
1561 CRef<CUser_object> user = options.MakeUserObject();
1562 BOOST_CHECK_EQUAL(user->GetObjectType(), CUser_object::eObjectType_AutodefOptions);
1563 options.SetUseLabels();
1564 user = options.MakeUserObject();
1565 CheckAutoDefOptions(*user, options);
1566 }
1567
1568
BOOST_AUTO_TEST_CASE(Test_RemovableuORF)1569 BOOST_AUTO_TEST_CASE(Test_RemovableuORF)
1570 {
1571 CRef<CSeq_entry> entry = BuildNucProtSet("uORF");
1572 CRef<CSeqdesc> desc = AddSource(entry, "Alcanivorax sp. HA03");
1573 CRef<CSeq_entry> nuc = unit_test_util::GetNucleotideSequenceFromGoodNucProtSet(entry);
1574 AddTitle(nuc, "Alcanivorax sp. HA03 uORF gene, complete cds.");
1575
1576 CheckDeflineMatches(entry);
1577
1578 // try again, with another feature present, so uORF isn't lonely
1579 CRef<objects::CSeq_feat> misc = unit_test_util::AddMiscFeature(nuc);
1580 misc->SetData().SetImp().SetKey("repeat_region");
1581 CRef<CGb_qual> q(new CGb_qual("satellite", "x"));
1582 misc->SetQual().push_back(q);
1583 AddTitle(nuc, "Alcanivorax sp. HA03 satellite x sequence.");
1584 CheckDeflineMatches(entry);
1585
1586 // try again, but set keepORFs flag
1587 CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
1588
1589 CRef<CScope> scope(new CScope(*object_manager));
1590 CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
1591
1592 objects::CAutoDefWithTaxonomy autodef;
1593
1594 // add to autodef
1595 autodef.AddSources(seh);
1596
1597 CRef<CAutoDefModifierCombo> mod_combo;
1598 mod_combo = new CAutoDefModifierCombo();
1599
1600 autodef.SetFeatureListType(CAutoDefOptions::eListAllFeatures);
1601 autodef.SetMiscFeatRule(CAutoDefOptions::eDelete);
1602 autodef.SetKeepuORFs(true);
1603
1604 AddTitle(nuc, "Alcanivorax sp. HA03 uORF gene, complete cds; and satellite x sequence.");
1605 CheckDeflineMatches(seh, autodef, mod_combo);
1606
1607 }
1608
BOOST_AUTO_TEST_CASE(Test_RemovableMobileElement)1609 BOOST_AUTO_TEST_CASE(Test_RemovableMobileElement)
1610 {
1611 // first, try with lonely optional
1612 CRef<CSeq_entry> entry = unit_test_util::BuildGoodSeq();
1613 CRef<objects::CSeq_feat> mob_el = unit_test_util::AddMiscFeature(entry);
1614 mob_el->SetData().SetImp().SetKey("mobile_element");
1615 CRef<CGb_qual> met(new CGb_qual("mobile_element_type", "SINE:x"));
1616 mob_el->SetQual().push_back(met);
1617 AddTitle(entry, "Sebaea microphylla SINE x, complete sequence.");
1618
1619 CheckDeflineMatches(entry);
1620
1621 // try again, with another feature present, so element isn't lonely
1622 CRef<objects::CSeq_feat> misc = unit_test_util::AddMiscFeature(entry);
1623 misc->SetData().SetImp().SetKey("repeat_region");
1624 CRef<CGb_qual> q(new CGb_qual("satellite", "y"));
1625 misc->SetQual().push_back(q);
1626 misc->SetLocation().SetInt().SetFrom(0);
1627 misc->SetLocation().SetInt().SetTo(10);
1628 mob_el->SetLocation().SetInt().SetFrom(15);
1629 mob_el->SetLocation().SetInt().SetTo(20);
1630 AddTitle(entry, "Sebaea microphylla satellite y sequence.");
1631 CheckDeflineMatches(entry);
1632
1633 // try again, but set keepMobileElements flag
1634 CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
1635
1636 CRef<CScope> scope(new CScope(*object_manager));
1637 CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
1638
1639 objects::CAutoDefWithTaxonomy autodef;
1640
1641 // add to autodef
1642 autodef.AddSources(seh);
1643
1644 CRef<CAutoDefModifierCombo> mod_combo;
1645 mod_combo = new CAutoDefModifierCombo();
1646
1647 autodef.SetFeatureListType(CAutoDefOptions::eListAllFeatures);
1648 autodef.SetMiscFeatRule(CAutoDefOptions::eDelete);
1649 autodef.SetKeepOptionalMobileElements(true);
1650
1651 AddTitle(entry, "Sebaea microphylla satellite y sequence; and SINE x, complete sequence.");
1652 CheckDeflineMatches(seh, autodef, mod_combo);
1653
1654 // keep non-optional mobile element when not lonely and flag not set
1655 met->SetVal("transposon:z");
1656 autodef.SetKeepOptionalMobileElements(false);
1657 AddTitle(entry, "Sebaea microphylla satellite y sequence; and transposon z, complete sequence.");
1658 CheckDeflineMatches(seh, autodef, mod_combo);
1659
1660 }
1661
BOOST_AUTO_TEST_CASE(GB_5272)1662 BOOST_AUTO_TEST_CASE(GB_5272)
1663 {
1664 CRef<CSeq_entry> entry = BuildNucProtSet("rhodanese-related sulfurtransferase");
1665 CRef<CSeqdesc> desc = AddSource(entry, "Coxiella burnetii");
1666 CRef<CSeq_entry> nuc = unit_test_util::GetNucleotideSequenceFromGoodNucProtSet(entry);
1667 CRef<CSeq_feat> gene(new CSeq_feat());
1668 gene->SetData().SetGene().SetLocus_tag("CBU_0065");
1669 AddFeat(gene, nuc);
1670 CRef<CSeq_feat> cds = unit_test_util::GetCDSFromGoodNucProtSet(entry);
1671 cds->SetLocation().SetPartialStart(true, eExtreme_Biological);
1672 gene->SetLocation().SetPartialStart(true, eExtreme_Biological);
1673 AddTitle(nuc, "Coxiella burnetii rhodanese-related sulfurtransferase (CBU_0065) gene, partial cds.");
1674 CheckDeflineMatches(entry);
1675 }
1676
BOOST_AUTO_TEST_CASE(GB_5272a)1677 BOOST_AUTO_TEST_CASE(GB_5272a)
1678 {
1679 CRef<CSeq_entry> entry = BuildNucProtSet("hypothetical protein");
1680 CRef<CSeqdesc> desc = AddSource(entry, "Coxiella burnetii");
1681 CRef<CSeq_entry> nuc = unit_test_util::GetNucleotideSequenceFromGoodNucProtSet(entry);
1682 CRef<CSeq_feat> cds = unit_test_util::GetCDSFromGoodNucProtSet(entry);
1683 CRef<CSeq_feat> gene(new CSeq_feat());
1684 gene->SetData().SetGene().SetLocus_tag("CBU_0067");
1685 AddFeat(gene, nuc);
1686 gene->SetLocation().Assign(cds->GetLocation());
1687
1688 CRef<CSeq_feat> cds2 = unit_test_util::MakeCDSForGoodNucProtSet("nuc", "prot2");
1689 cds2->SetLocation().SetInt().SetFrom(5);
1690 unit_test_util::AddFeat(cds2, entry);
1691 CRef<CSeq_entry> pentry = unit_test_util::MakeProteinForGoodNucProtSet("prot2");
1692 entry->SetSet().SetSeq_set().push_back(pentry);
1693 pentry->SetSeq().SetAnnot().front()->SetData().SetFtable().front()->SetData().SetProt().SetName().front() = "hypothetical protein";
1694 CRef<CSeq_feat> gene2(new CSeq_feat());
1695 gene2->SetData().SetGene().SetLocus_tag("CBU_0068");
1696 AddFeat(gene2, nuc);
1697 gene2->SetLocation().Assign(cds2->GetLocation());
1698
1699 AddTitle(nuc, "Coxiella burnetii hypothetical protein (CBU_0067) and hypothetical protein (CBU_0068) genes, complete cds.");
1700 CheckDeflineMatches(entry);
1701
1702 // try again, but suppress genes
1703 CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
1704
1705 CRef<CScope> scope(new CScope(*object_manager));
1706 CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
1707
1708 objects::CAutoDefWithTaxonomy autodef;
1709
1710 // add to autodef
1711 autodef.AddSources(seh);
1712
1713 CRef<CAutoDefModifierCombo> mod_combo;
1714 mod_combo = new CAutoDefModifierCombo();
1715
1716 autodef.SuppressFeature(CSeqFeatData::eSubtype_gene);
1717
1718 AddTitle(nuc, "Coxiella burnetii hypothetical protein genes, complete cds.");
1719 CheckDeflineMatches(seh, autodef, mod_combo);
1720 }
1721
1722
BOOST_AUTO_TEST_CASE(GB_5272b)1723 BOOST_AUTO_TEST_CASE(GB_5272b)
1724 {
1725 CRef<CSeq_entry> entry = BuildNucProtSet("hypothetical protein");
1726 CRef<CSeqdesc> desc = AddSource(entry, "Coxiella burnetii");
1727 CRef<CSeq_entry> nuc = unit_test_util::GetNucleotideSequenceFromGoodNucProtSet(entry);
1728
1729 CRef<CSeq_feat> cds3 = unit_test_util::MakeCDSForGoodNucProtSet("nuc", "prot3");
1730 cds3->SetLocation().SetInt().SetFrom(5);
1731 unit_test_util::AddFeat(cds3, entry);
1732 CRef<CSeq_entry> pentry = unit_test_util::MakeProteinForGoodNucProtSet("prot3");
1733 entry->SetSet().SetSeq_set().push_back(pentry);
1734 pentry->SetSeq().SetAnnot().front()->SetData().SetFtable().front()->SetData().SetProt().SetName().front() = "hypothetical protein";
1735
1736 AddTitle(nuc, "Coxiella burnetii hypothetical protein genes, complete cds.");
1737 CheckDeflineMatches(entry);
1738
1739 // try again, but with intervening non-hypothetical protein gene
1740 CRef<CSeq_feat> cds2 = unit_test_util::MakeCDSForGoodNucProtSet("nuc", "prot2");
1741 cds2->SetLocation().SetInt().SetFrom(3);
1742 unit_test_util::AddFeat(cds2, entry);
1743 CRef<CSeq_entry> pentry2 = unit_test_util::MakeProteinForGoodNucProtSet("prot2");
1744 entry->SetSet().SetSeq_set().push_back(pentry2);
1745 pentry2->SetSeq().SetAnnot().front()->SetData().SetFtable().front()->SetData().SetProt().SetName().front() = "fake protein";
1746
1747 AddTitle(nuc, "Coxiella burnetii hypothetical protein, fake protein, and hypothetical protein genes, complete cds.");
1748 CheckDeflineMatches(entry);
1749
1750 }
1751
1752
BOOST_AUTO_TEST_CASE(SQD_3462)1753 BOOST_AUTO_TEST_CASE(SQD_3462)
1754 {
1755 CRef<CSeq_entry> entry = BuildNucProtSet("brahma protein");
1756 CRef<CSeqdesc> desc = AddSource(entry, "Anas castanea");
1757 unit_test_util::SetOrgMod(entry, COrgMod::eSubtype_isolate, "DPIWECT127");
1758 CRef<CSeq_feat> cds = unit_test_util::GetCDSFromGoodNucProtSet(entry);
1759 cds->SetLocation().SetInt().SetTo(8);
1760 cds->SetLocation().SetPartialStart(true, eExtreme_Biological);
1761 cds->SetLocation().SetPartialStop(true, eExtreme_Biological);
1762 CRef<CSeq_entry> nuc = unit_test_util::GetNucleotideSequenceFromGoodNucProtSet(entry);
1763 CRef<CSeq_feat> exon = unit_test_util::AddMiscFeature(nuc);
1764 exon->ResetComment();
1765 exon->SetData().SetImp().SetKey("exon");
1766 exon->SetLocation().SetInt().SetFrom(0);
1767 exon->SetLocation().SetInt().SetTo(8);
1768 CRef<CGb_qual> exon_number(new CGb_qual("number", "15"));
1769 exon->SetQual().push_back(exon_number);
1770 CRef<CSeq_feat> intron = unit_test_util::AddMiscFeature(nuc);
1771 intron->ResetComment();
1772 intron->SetData().SetImp().SetKey("intron");
1773 intron->SetLocation().SetInt().SetFrom(9);
1774 intron->SetLocation().SetInt().SetTo(nuc->GetSeq().GetLength() - 1);
1775 CRef<CGb_qual> intron_number(new CGb_qual("number", "15"));
1776 intron->SetQual().push_back(intron_number);
1777
1778 CRef<CSeq_feat> gene = unit_test_util::AddMiscFeature(nuc);
1779 gene->ResetComment();
1780 gene->SetData().SetGene().SetLocus("BRM");
1781 gene->SetLocation().SetInt().SetTo(nuc->GetSeq().GetLength() - 1);
1782
1783 objects::CAutoDefWithTaxonomy autodef;
1784
1785 CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
1786
1787 CRef<CScope> scope(new CScope(*object_manager));
1788 CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
1789 autodef.AddSources(seh);
1790 autodef.SetKeepExons(true);
1791 autodef.SetKeepIntrons(true);
1792
1793 CRef<CAutoDefModifierCombo> mod_combo(new CAutoDefModifierCombo());
1794 mod_combo->AddOrgMod(COrgMod::eSubtype_isolate, true);
1795 mod_combo->SetUseModifierLabels(true);
1796
1797 AddTitle(nuc, "Anas castanea isolate DPIWECT127 brahma protein (BRM) gene, exon 15, intron 15, and partial cds.");
1798 CheckDeflineMatches(seh, autodef, mod_combo);
1799
1800 }
1801
BOOST_AUTO_TEST_CASE(Test_IsModifierInString)1802 BOOST_AUTO_TEST_CASE(Test_IsModifierInString)
1803 {
1804 // in the string, but ignore because it's at the end
1805 BOOST_CHECK_EQUAL(CAutoDefModifierCombo::IsModifierInString("abc", "abc", true), false);
1806 // in the string, report even at end
1807 BOOST_CHECK_EQUAL(CAutoDefModifierCombo::IsModifierInString("abc", "abc", false), true);
1808 // ignore because not whole word
1809 BOOST_CHECK_EQUAL(CAutoDefModifierCombo::IsModifierInString("abc", "tabc", false), false);
1810 // ignore because not whole word
1811 BOOST_CHECK_EQUAL(CAutoDefModifierCombo::IsModifierInString("abc", "abcq", false), false);
1812 // skip first match because not whole word, find second match
1813 BOOST_CHECK_EQUAL(CAutoDefModifierCombo::IsModifierInString("abc", "abcq abc", false), true);
1814
1815
1816 }
1817
1818
BOOST_AUTO_TEST_CASE(Test_IsUsableInDefline)1819 BOOST_AUTO_TEST_CASE(Test_IsUsableInDefline)
1820 {
1821 BOOST_CHECK_EQUAL(CAutoDefModifierCombo::IsUsableInDefline(CSubSource::eSubtype_plasmid_name), true);
1822 BOOST_CHECK_EQUAL(CAutoDefModifierCombo::IsUsableInDefline(CSubSource::eSubtype_collected_by), false);
1823 BOOST_CHECK_EQUAL(CAutoDefModifierCombo::IsUsableInDefline(COrgMod::eSubtype_strain), true);
1824 BOOST_CHECK_EQUAL(CAutoDefModifierCombo::IsUsableInDefline(COrgMod::eSubtype_variety), false);
1825 }
1826
1827
BOOST_AUTO_TEST_CASE(Test_GB_5493)1828 BOOST_AUTO_TEST_CASE(Test_GB_5493)
1829 {
1830 // first, try with lonely optional
1831 CRef<CSeq_entry> entry = unit_test_util::BuildGoodSeq();
1832 CRef<objects::CSeq_feat> miscrna = unit_test_util::AddMiscFeature(entry);
1833 miscrna->SetData().SetRna().SetType(CRNA_ref::eType_other);
1834 string remainder;
1835 miscrna->SetData().SetRna().SetRnaProductName("trans-spliced leader sequence SL", remainder);
1836 miscrna->SetComment("mini-exon");
1837 AddTitle(entry, "Sebaea microphylla trans-spliced leader sequence SL gene, complete sequence.");
1838
1839 CheckDeflineMatches(entry);
1840 }
1841
1842
BOOST_AUTO_TEST_CASE(Test_TargetedLocusName)1843 BOOST_AUTO_TEST_CASE(Test_TargetedLocusName)
1844 {
1845 CAutoDefOptions options;
1846 options.SetTargetedLocusName("consensus string");
1847 BOOST_CHECK_EQUAL(options.GetTargetedLocusName(), "consensus string");
1848 CRef<CUser_object> user = options.MakeUserObject();
1849 BOOST_CHECK_EQUAL(HasStringField(*user, "Targeted Locus Name", "consensus string"), 1);
1850
1851 options.SetTargetedLocusName("other");
1852 BOOST_CHECK_EQUAL(options.GetTargetedLocusName(), "other");
1853 options.InitFromUserObject(*user);
1854 BOOST_CHECK_EQUAL(options.GetTargetedLocusName(), "consensus string");
1855
1856
1857 }
1858
1859
BOOST_AUTO_TEST_CASE(Test_SQD_3602)1860 BOOST_AUTO_TEST_CASE(Test_SQD_3602)
1861 {
1862 CRef<CSeq_entry> entry = unit_test_util::BuildGoodSeq();
1863 unit_test_util::SetGenome(entry, CBioSource::eGenome_mitochondrion);
1864 CRef<objects::CSeq_feat> misc = unit_test_util::AddMiscFeature(entry);
1865 misc->SetComment("contains tRNA-Pro gene, control region, tRNA-Phe gene, and 12S ribosomal RNA gene");
1866 misc->SetLocation().SetPartialStart(true, eExtreme_Biological);
1867 misc->SetLocation().SetPartialStop(true, eExtreme_Biological);
1868 AddTitle(entry, "Sebaea microphylla tRNA-Pro gene, partial sequence; control region and tRNA-Phe gene, complete sequence; and 12S ribosomal RNA gene, partial sequence; mitochondrial.");
1869
1870 CheckDeflineMatches(entry);
1871
1872 }
1873
1874
BOOST_AUTO_TEST_CASE(Test_SB_5494)1875 BOOST_AUTO_TEST_CASE(Test_SB_5494)
1876 {
1877 CRef<CSeq_entry> entry = unit_test_util::BuildGoodSeq();
1878 unit_test_util::SetGenome(entry, CBioSource::eGenome_mitochondrion);
1879 CRef<objects::CSeq_feat> misc = unit_test_util::AddMiscFeature(entry);
1880 misc->SetComment("contains 12S ribosomal RNA gene, tRNA-Val (trnV) gene, and 16S ribosomal RNA gene");
1881 misc->SetLocation().SetPartialStart(true, eExtreme_Biological);
1882 misc->SetLocation().SetPartialStop(true, eExtreme_Biological);
1883 AddTitle(entry, "Sebaea microphylla 12S ribosomal RNA gene, partial sequence; tRNA-Val (trnV) gene, complete sequence; and 16S ribosomal RNA gene, partial sequence; mitochondrial.");
1884
1885 CheckDeflineMatches(entry);
1886 }
1887
1888
BOOST_AUTO_TEST_CASE(Test_GB_5447)1889 BOOST_AUTO_TEST_CASE(Test_GB_5447)
1890 {
1891 CRef<CSeq_entry> entry = unit_test_util::BuildGoodNucProtSet();
1892 CRef<CSeq_entry> nuc = unit_test_util::GetNucleotideSequenceFromGoodNucProtSet(entry);
1893 CRef<CSeq_feat> cds1 = unit_test_util::GetCDSFromGoodNucProtSet(entry);
1894 CRef<CSeq_feat> prot1 = unit_test_util::GetProtFeatFromGoodNucProtSet(entry);
1895 prot1->SetData().SetProt().SetName().front() = "hypothetical protein";
1896 CRef<CSeq_feat> cds2 = unit_test_util::AddMiscFeature(nuc);
1897 cds2->SetData().SetCdregion();
1898 cds2->ResetComment();
1899 cds2->SetLocation().SetInt().SetFrom(cds1->GetLocation().GetStart(eExtreme_Positional));
1900 cds2->SetLocation().SetInt().SetTo(nuc->GetSeq().GetInst().GetLength() - 1);
1901
1902 CRef<CSeq_entry> pentry(new CSeq_entry());
1903 pentry->SetSeq().SetInst().SetMol(objects::CSeq_inst::eMol_aa);
1904 pentry->SetSeq().SetInst().SetRepr(objects::CSeq_inst::eRepr_raw);
1905 pentry->SetSeq().SetInst().SetSeq_data().SetIupacaa().Set("MPRKTEIN");
1906 pentry->SetSeq().SetInst().SetLength(8);
1907
1908 CRef<objects::CSeq_id> pid(new objects::CSeq_id());
1909 pid->SetLocal().SetStr("prot2");
1910 pentry->SetSeq().SetId().push_back(pid);
1911 entry->SetSet().SetSeq_set().push_back(pentry);
1912 cds2->SetProduct().SetWhole().SetLocal().SetStr("prot2");
1913 CRef<CSeq_feat> prot2 = unit_test_util::AddProtFeat(pentry);
1914 prot2->SetData().SetProt().SetName().front() = "hypothetical protein";
1915
1916 AddTitle(nuc, "Sebaea microphylla hypothetical protein genes, complete cds.");
1917 CheckDeflineMatches(entry, true);
1918
1919 }
1920
1921
MakeRegulatoryFeatureTest(const string & regulatory_class,const string & defline_interval,bool use_fake_promoters,bool keep_regulatory)1922 void MakeRegulatoryFeatureTest(const string& regulatory_class, const string& defline_interval, bool use_fake_promoters, bool keep_regulatory)
1923 {
1924 objects::CAutoDefWithTaxonomy autodef;
1925 CRef<CAutoDefModifierCombo> mod_combo(new CAutoDefModifierCombo());
1926
1927 CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
1928 CRef<CScope> scope(new CScope(*object_manager));
1929
1930 CRef<CSeq_entry> entry = unit_test_util::BuildGoodNucProtSet();
1931 CRef<CSeq_entry> nuc = unit_test_util::GetNucleotideSequenceFromGoodNucProtSet(entry);
1932 if (!NStr::IsBlank(regulatory_class)) {
1933 CRef<objects::CSeq_feat> feat = unit_test_util::AddMiscFeature(entry);
1934 feat->SetData().SetImp().SetKey("regulatory");
1935 CRef<CGb_qual> q(new CGb_qual("regulatory_class", regulatory_class));
1936 feat->SetQual().push_back(q);
1937 }
1938 AddTitle(nuc, "Sebaea microphylla fake protein name gene, " + defline_interval);
1939 CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
1940 autodef.AddSources(seh);
1941 if (use_fake_promoters) {
1942 autodef.SetUseFakePromoters(true);
1943 }
1944 if (keep_regulatory) {
1945 autodef.SetKeepRegulatoryFeatures(true);
1946 }
1947
1948 CheckDeflineMatches(seh, autodef, mod_combo);
1949 scope->RemoveTopLevelSeqEntry(seh);
1950
1951 }
1952
1953
BOOST_AUTO_TEST_CASE(GB_5537)1954 BOOST_AUTO_TEST_CASE(GB_5537)
1955 {
1956 // a sequence with no promoter, but we set the FakePromoter flag
1957 MakeRegulatoryFeatureTest(kEmptyStr, "promoter region and complete cds.", true, false);
1958
1959 // a sequence with a promoter, but no flags
1960 MakeRegulatoryFeatureTest("promoter", "complete cds.", false, false);
1961
1962 // a sequence with a promoter, set the FakePromoter flag
1963 MakeRegulatoryFeatureTest("promoter", "promoter region and complete cds.", true, false);
1964
1965 // a sequence with a promoter, set keep regulatory
1966 MakeRegulatoryFeatureTest("promoter", "promoter region and complete cds.", false, true);
1967
1968 // a sequence with a promoter, set keep regulatory and FakePromoter
1969 MakeRegulatoryFeatureTest("promoter", "promoter region and complete cds.", true, true);
1970
1971 // a sequence with an enhancer, but no flags
1972 MakeRegulatoryFeatureTest("enhancer", "complete cds.", false, false);
1973
1974 // a sequence with an enhancer, set fake promoters flag
1975 MakeRegulatoryFeatureTest("enhancer", "promoter region and complete cds.", true, false);
1976
1977 // a sequence with an enhancer, set keep regulatory
1978 MakeRegulatoryFeatureTest("enhancer", "enhancer and complete cds.", false, true);
1979
1980 }
1981
1982
BOOST_AUTO_TEST_CASE(Test_AutodefOptionsSpecifyNuclearCopyFlag)1983 BOOST_AUTO_TEST_CASE(Test_AutodefOptionsSpecifyNuclearCopyFlag)
1984 {
1985 CAutoDefOptions opts;
1986
1987 opts.SetNuclearCopyFlag(CBioSource::eGenome_mitochondrion);
1988 CRef<CUser_object> user = opts.MakeUserObject();
1989 BOOST_CHECK_EQUAL(HasStringField(*user, "NuclearCopyFlag", "mitochondrion"), 1);
1990
1991 }
1992
1993
BOOST_AUTO_TEST_CASE(Test_GB_5560)1994 BOOST_AUTO_TEST_CASE(Test_GB_5560)
1995 {
1996 CRef<CSeq_entry> entry = unit_test_util::BuildGoodSeq();
1997 CRef<objects::CSeq_feat> misc = unit_test_util::AddMiscFeature(entry);
1998 misc->ResetComment();
1999 misc->SetData().SetImp().SetKey("repeat_region");
2000 CRef<CGb_qual> q(new CGb_qual("rpt_type", "long_terminal_repeat"));
2001 misc->SetQual().push_back(q);
2002 AddTitle(entry, "Sebaea microphylla LTR repeat region.");
2003
2004 CheckDeflineMatches(entry);
2005 }
2006
2007
BOOST_AUTO_TEST_CASE(Test_GB_5758)2008 BOOST_AUTO_TEST_CASE(Test_GB_5758)
2009 {
2010 CRef<CSeq_entry> entry = unit_test_util::BuildGoodSeq();
2011 unit_test_util::SetSubSource(entry, CSubSource::eSubtype_other, "a; minicircle b; c");
2012 AddTitle(entry, "Sebaea microphylla minicircle b sequence.");
2013 CheckDeflineMatches(entry);
2014
2015 AddTitle(entry, "Sebaea microphylla a minicircle b sequence.");
2016
2017 vector<CSubSource::ESubtype> subsrcs;
2018 subsrcs.push_back(CSubSource::eSubtype_other);
2019 vector<COrgMod::ESubtype> orgmods;
2020 CheckDeflineMatches(entry, subsrcs, orgmods);
2021 }
2022
2023
TestForRecomb(CRef<CSeq_entry> entry,const string & expected)2024 void TestForRecomb(CRef<CSeq_entry> entry, const string& expected)
2025 {
2026 AddTitle(entry, expected);
2027 objects::CAutoDefWithTaxonomy autodef;
2028 CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
2029 CRef<CScope> scope(new CScope(*object_manager));
2030 CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
2031 autodef.AddSources(seh);
2032 CRef<CAutoDefModifierCombo> mod_combo(new CAutoDefModifierCombo());
2033 autodef.SetFeatureListType(CAutoDefOptions::eListAllFeatures);
2034 autodef.SetKeepMiscRecomb(true);
2035 CheckDeflineMatches(seh, autodef, mod_combo);
2036 }
2037
2038
BOOST_AUTO_TEST_CASE(Test_GB_5793)2039 BOOST_AUTO_TEST_CASE(Test_GB_5793)
2040 {
2041 CRef<CSeq_entry> entry = unit_test_util::BuildGoodSeq();
2042 CRef<CSeq_feat> m = unit_test_util::AddMiscFeature(entry);
2043 m->SetData().SetImp().SetKey("misc_recomb");
2044 m->SetComment("GCC2-ALK translocation breakpoint junction; microhomology");
2045
2046 // by default, misc_recomb not included
2047 AddTitle(entry, "Sebaea microphylla sequence.");
2048 CheckDeflineMatches(entry);
2049
2050 // use option to show misc_recomb
2051 TestForRecomb(entry, "Sebaea microphylla GCC2-ALK translocation breakpoint junction genomic sequence.");
2052
2053 // prefer recombination_class qualifier
2054 m->SetQual().push_back(CRef<CGb_qual>(new CGb_qual("recombination_class", "mitotic_recombination")));
2055 TestForRecomb(entry, "Sebaea microphylla mitotic_recombination genomic sequence.");
2056 }
2057
2058
BOOST_AUTO_TEST_CASE(Test_GB_5765)2059 BOOST_AUTO_TEST_CASE(Test_GB_5765)
2060 {
2061 CRef<CSeq_entry> entry = unit_test_util::BuildGoodSeq();
2062 CRef<CSeq_feat> m = unit_test_util::AddMiscFeature(entry);
2063 AddTitle(entry, "Sebaea microphylla special flower.");
2064 objects::CAutoDefWithTaxonomy autodef;
2065 CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
2066 CRef<CScope> scope(new CScope(*object_manager));
2067 CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
2068 autodef.AddSources(seh);
2069 CRef<CAutoDefModifierCombo> mod_combo(new CAutoDefModifierCombo());
2070 autodef.SetFeatureListType(CAutoDefOptions::eListAllFeatures);
2071 autodef.SetCustomFeatureClause("special flower");
2072 CheckDeflineMatches(seh, autodef, mod_combo);
2073 }
2074
2075
BOOST_AUTO_TEST_CASE(Test_SQD_3914)2076 BOOST_AUTO_TEST_CASE(Test_SQD_3914)
2077 {
2078 CRef<CSeq_entry> entry = unit_test_util::BuildGoodSeq();
2079 CRef<CSeq_feat> m = unit_test_util::AddMiscFeature(entry);
2080 m->SetData().SetRna().SetType(CRNA_ref::eType_miscRNA);
2081 m->SetComment("contains 16S-23S ribosomal RNA intergenic spacer, tRNA-Ile(trnI), and tRNA-Ala(trnA)");
2082 AddTitle(entry, "Sebaea microphylla 16S-23S ribosomal RNA intergenic spacer, tRNA-Ile (trnI) and tRNA-Ala (trnA) genes, complete sequence.");
2083 CheckDeflineMatches(entry);
2084 }
2085
2086
BOOST_AUTO_TEST_CASE(Test_CAutoDefAvailableModifier_GetOrgModLabel)2087 BOOST_AUTO_TEST_CASE(Test_CAutoDefAvailableModifier_GetOrgModLabel)
2088 {
2089 BOOST_CHECK_EQUAL(CAutoDefAvailableModifier::GetOrgModLabel(COrgMod::eSubtype_culture_collection), "culture");
2090 BOOST_CHECK_EQUAL(CAutoDefAvailableModifier::GetOrgModLabel(COrgMod::eSubtype_acronym), "acronym");
2091 BOOST_CHECK_EQUAL(CAutoDefAvailableModifier::GetOrgModLabel(COrgMod::eSubtype_strain), "strain");
2092 BOOST_CHECK_EQUAL(CAutoDefAvailableModifier::GetOrgModLabel(COrgMod::eSubtype_substrain), "substrain");
2093 BOOST_CHECK_EQUAL(CAutoDefAvailableModifier::GetOrgModLabel(COrgMod::eSubtype_type), "type");
2094 BOOST_CHECK_EQUAL(CAutoDefAvailableModifier::GetOrgModLabel(COrgMod::eSubtype_subtype), "subtype");
2095 BOOST_CHECK_EQUAL(CAutoDefAvailableModifier::GetOrgModLabel(COrgMod::eSubtype_variety), "variety");
2096 BOOST_CHECK_EQUAL(CAutoDefAvailableModifier::GetOrgModLabel(COrgMod::eSubtype_serotype), "serotype");
2097 BOOST_CHECK_EQUAL(CAutoDefAvailableModifier::GetOrgModLabel(COrgMod::eSubtype_serogroup), "serogroup");
2098 BOOST_CHECK_EQUAL(CAutoDefAvailableModifier::GetOrgModLabel(COrgMod::eSubtype_serovar), "serovar");
2099 BOOST_CHECK_EQUAL(CAutoDefAvailableModifier::GetOrgModLabel(COrgMod::eSubtype_cultivar), "cultivar");
2100 BOOST_CHECK_EQUAL(CAutoDefAvailableModifier::GetOrgModLabel(COrgMod::eSubtype_pathovar), "pathovar");
2101 BOOST_CHECK_EQUAL(CAutoDefAvailableModifier::GetOrgModLabel(COrgMod::eSubtype_chemovar), "chemovar");
2102 BOOST_CHECK_EQUAL(CAutoDefAvailableModifier::GetOrgModLabel(COrgMod::eSubtype_biovar), "biovar");
2103 BOOST_CHECK_EQUAL(CAutoDefAvailableModifier::GetOrgModLabel(COrgMod::eSubtype_biotype), "biotype");
2104 BOOST_CHECK_EQUAL(CAutoDefAvailableModifier::GetOrgModLabel(COrgMod::eSubtype_group), "group");
2105 BOOST_CHECK_EQUAL(CAutoDefAvailableModifier::GetOrgModLabel(COrgMod::eSubtype_subgroup), "subgroup");
2106 BOOST_CHECK_EQUAL(CAutoDefAvailableModifier::GetOrgModLabel(COrgMod::eSubtype_isolate), "isolate");
2107 BOOST_CHECK_EQUAL(CAutoDefAvailableModifier::GetOrgModLabel(COrgMod::eSubtype_authority), "authority");
2108 BOOST_CHECK_EQUAL(CAutoDefAvailableModifier::GetOrgModLabel(COrgMod::eSubtype_forma), "forma");
2109 BOOST_CHECK_EQUAL(CAutoDefAvailableModifier::GetOrgModLabel(COrgMod::eSubtype_ecotype), "ecotype");
2110 BOOST_CHECK_EQUAL(CAutoDefAvailableModifier::GetOrgModLabel(COrgMod::eSubtype_synonym), "synonym");
2111 BOOST_CHECK_EQUAL(CAutoDefAvailableModifier::GetOrgModLabel(COrgMod::eSubtype_anamorph), "anamorph");
2112 BOOST_CHECK_EQUAL(CAutoDefAvailableModifier::GetOrgModLabel(COrgMod::eSubtype_teleomorph), "teleomorph");
2113 BOOST_CHECK_EQUAL(CAutoDefAvailableModifier::GetOrgModLabel(COrgMod::eSubtype_breed), "breed");
2114 }
2115
2116
BOOST_AUTO_TEST_CASE(Test_GB_5618)2117 BOOST_AUTO_TEST_CASE(Test_GB_5618)
2118 {
2119 CRef<CSeq_entry> entry = unit_test_util::BuildGoodSeq();
2120 CRef<CSeq_feat> utr3 = unit_test_util::AddMiscFeature(entry);
2121 utr3->SetLocation().SetInt().SetFrom(0);
2122 utr3->SetLocation().SetInt().SetTo(10);
2123 CRef<CSeq_feat> gene1 = unit_test_util::MakeGeneForFeature(utr3);
2124 unit_test_util::AddFeat(gene1, entry);
2125 CRef<CSeq_feat> utr5 = unit_test_util::AddMiscFeature(entry);
2126 utr5->SetLocation().SetInt().SetFrom(20);
2127 utr5->SetLocation().SetInt().SetTo(25);
2128 CRef<CSeq_feat> gene2 = unit_test_util::MakeGeneForFeature(utr5);
2129 unit_test_util::AddFeat(gene2, entry);
2130
2131 string defline = "Sebaea microphylla gene locus gene, complete sequence.";
2132 AddTitle(entry, defline);
2133
2134 objects::CAutoDefWithTaxonomy autodef;
2135
2136 CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
2137
2138 CRef<CScope> scope(new CScope(*object_manager));
2139 CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
2140 autodef.AddSources(seh);
2141 autodef.SetKeep3UTRs(true);
2142 autodef.SetKeep5UTRs(true);
2143
2144 CRef<CAutoDefModifierCombo> mod_combo(new CAutoDefModifierCombo());
2145 mod_combo->AddOrgMod(COrgMod::eSubtype_isolate, true);
2146 mod_combo->SetUseModifierLabels(true);
2147
2148 defline = "Sebaea microphylla gene locus gene, 5' UTR and 3' UTR.";
2149 CheckDeflineMatches(seh, autodef, mod_combo);
2150 }
2151
2152
BOOST_AUTO_TEST_CASE(Test_GB_6375)2153 BOOST_AUTO_TEST_CASE(Test_GB_6375)
2154 {
2155 // suppress if no number
2156 CRef<CSeq_entry> nps = unit_test_util::BuildGoodNucProtSet();
2157 CRef<CSeq_entry> nuc = unit_test_util::GetNucleotideSequenceFromGoodNucProtSet(nps);
2158 CRef<CSeq_feat> cds = unit_test_util::GetCDSFromGoodNucProtSet(nps);
2159 cds->SetLocation().SetPartialStop(true, eExtreme_Biological);
2160 AddExon(nuc, "", cds->GetLocation().GetStart(eExtreme_Positional));
2161 string defline = "Sebaea microphylla fake protein name gene, partial cds.";
2162 AddTitle(nuc, defline);
2163 CheckDeflineMatches(nps, true);
2164
2165 // show if has number
2166 nps = unit_test_util::BuildGoodNucProtSet();
2167 nuc = unit_test_util::GetNucleotideSequenceFromGoodNucProtSet(nps);
2168 cds = unit_test_util::GetCDSFromGoodNucProtSet(nps);
2169 cds->SetLocation().SetPartialStop(true, eExtreme_Biological);
2170 AddExon(nuc, "1", cds->GetLocation().GetStart(eExtreme_Positional));
2171 defline = "Sebaea microphylla fake protein name gene, exon 1 and partial cds.";
2172 AddTitle(nuc, defline);
2173 CheckDeflineMatches(nps, true);
2174
2175 // suppress if coding region complete
2176 cds->SetLocation().SetPartialStop(false, eExtreme_Biological);
2177 defline = "Sebaea microphylla fake protein name gene, complete cds.";
2178 AddTitle(nuc, defline);
2179 CheckDeflineMatches(nps, true);
2180
2181 }
2182
2183
BOOST_AUTO_TEST_CASE(Test_GB_6557)2184 BOOST_AUTO_TEST_CASE(Test_GB_6557)
2185 {
2186 // nuclear gene for X product (but not for macronuclear)
2187 CRef<CSeq_entry> nps = unit_test_util::BuildGoodNucProtSet();
2188 CRef<CSeq_entry> nuc = unit_test_util::GetNucleotideSequenceFromGoodNucProtSet(nps);
2189 CRef<CSeq_feat> prot = unit_test_util::GetProtFeatFromGoodNucProtSet(nps);
2190 prot->SetData().SetProt().SetName().front() = "LIA2 macronuclear isoform";
2191
2192 string defline = "Sebaea microphylla LIA2 macronuclear isoform gene, complete cds.";
2193 AddTitle(nuc, defline);
2194 CheckDeflineMatches(nps, true);
2195
2196 // apicoplast
2197 prot->SetData().SetProt().SetName().front() = "LIA2 apicoplast protein";
2198 defline = "Sebaea microphylla LIA2 apicoplast protein gene, complete cds; nuclear gene for apicoplast product.";
2199 AddTitle(nuc, defline);
2200 CheckDeflineMatches(nps, true);
2201
2202 }
2203
2204
BOOST_AUTO_TEST_CASE(Test_SQD_4185)2205 BOOST_AUTO_TEST_CASE(Test_SQD_4185)
2206 {
2207 CRef<CSeq_entry> seq = unit_test_util::BuildGoodNucProtSet();
2208 unit_test_util::SetTaxname(seq, "Influenza A virus (A/USA/RVD1_H1/2011(H1N1))");
2209 unit_test_util::SetSubSource(seq, CSubSource::eSubtype_segment, "4");
2210 string defline = "Influenza A virus (A/USA/RVD1_H1/2011(H1N1)) segment 4 hemagglutinin (HA) gene, complete cds.";
2211 CRef<CSeq_entry> nuc = unit_test_util::GetNucleotideSequenceFromGoodNucProtSet(seq);
2212 AddTitle(nuc, defline);
2213 unit_test_util::SetNucProtSetProductName(seq, "hemagglutinin");
2214 CRef<CSeq_feat> gene(new CSeq_feat());
2215 gene->SetData().SetGene().SetLocus("HA");
2216 AddFeat(gene, nuc);
2217
2218 CheckDeflineMatches(seq, true);
2219 }
2220
2221
BOOST_AUTO_TEST_CASE(Test_GB_6690)2222 BOOST_AUTO_TEST_CASE(Test_GB_6690)
2223 {
2224 // do not include notes in deflines when calculating uniqueness
2225 CRef<CSeq_entry> entry = unit_test_util::BuildGoodEcoSet();
2226 vector<string> notes = { "a", "b", "c" };
2227 vector<string>::iterator nit = notes.begin();
2228 NON_CONST_ITERATE(CBioseq_set::TSeq_set, it, entry->SetSet().SetSeq_set()) {
2229 AddTitle(*it, "Sebaea microphylla sequence.");
2230 unit_test_util::SetOrgMod(*it, COrgMod::eSubtype_other, *nit);
2231 ++nit;
2232 }
2233 entry->SetSet().ResetDescr();
2234 AddTitle(entry, "Sebaea microphylla sequence.");
2235
2236 CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
2237
2238 CRef<CScope> scope(new CScope(*object_manager));
2239 CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
2240
2241 objects::CAutoDef autodef;
2242 autodef.AddSources(seh);
2243
2244 CRef<CAutoDefModifierCombo> mod_combo = autodef.FindBestModifierCombo();
2245 BOOST_CHECK_EQUAL(mod_combo->HasOrgMod(COrgMod::eSubtype_other), false);
2246 BOOST_CHECK_EQUAL(mod_combo->HasSubSource(CSubSource::eSubtype_other), false);
2247
2248 CheckDeflineMatches(entry, true);
2249
2250 scope->RemoveTopLevelSeqEntry(seh);
2251
2252 nit = notes.begin();
2253 NON_CONST_ITERATE(CBioseq_set::TSeq_set, it, entry->SetSet().SetSeq_set()) {
2254 unit_test_util::SetOrgMod(*it, COrgMod::eSubtype_other, "");
2255 unit_test_util::SetOrgMod(*it, CSubSource::eSubtype_other, *nit);
2256 ++nit;
2257 }
2258
2259 seh = scope->AddTopLevelSeqEntry(*entry);
2260 objects::CAutoDef autodef2;
2261 autodef2.AddSources(seh);
2262 mod_combo = autodef.FindBestModifierCombo();
2263 BOOST_CHECK_EQUAL(mod_combo->HasOrgMod(COrgMod::eSubtype_other), false);
2264 BOOST_CHECK_EQUAL(mod_combo->HasSubSource(CSubSource::eSubtype_other), false);
2265
2266 CheckDeflineMatches(entry, true);
2267 }
2268
2269
MkField(const string & label,const string & val)2270 CRef<CUser_field> MkField(const string& label, const string& val)
2271 {
2272 CRef<CUser_field> f(new CUser_field());
2273 f->SetLabel().SetStr(label);
2274 f->SetData().SetStr(val);
2275 return f;
2276 }
2277
2278
BOOST_AUTO_TEST_CASE(Test_HumanSTR)2279 BOOST_AUTO_TEST_CASE(Test_HumanSTR)
2280 {
2281 CRef<CSeq_entry> entry = unit_test_util::BuildGoodSeq();
2282 CRef<CUser_object> obj(new CUser_object());
2283 obj->SetType().SetStr("StructuredComment");
2284 obj->SetData().push_back(MkField("StructuredCommentPrefix", "##HumanSTR-START##"));
2285 obj->SetData().push_back(MkField("STR locus name", "TPOX"));
2286 obj->SetData().push_back(MkField("Length-based allele", "7"));
2287 obj->SetData().push_back(MkField("Bracketed repeat", "[AATG]7"));
2288 CRef<CSeqdesc> d(new CSeqdesc());
2289 d->SetUser().Assign(*obj);
2290 entry->SetSeq().SetDescr().Set().push_back(d);
2291
2292 CRef<CSeq_feat> var = unit_test_util::AddMiscFeature(entry);
2293 var->SetData().SetImp().SetKey("variation");
2294 CRef<CDbtag> dbtag(new CDbtag());
2295 dbtag->SetDb("dbSNP");
2296 dbtag->SetTag().SetStr("rs115644759");
2297 var->SetDbxref().push_back(dbtag);
2298
2299 string defline = "Sebaea microphylla microsatellite TPOX 7 [AATG]7 rs115644759 sequence.";
2300 AddTitle(entry, defline);
2301
2302 CheckDeflineMatches(entry);
2303
2304 }
2305
2306
BOOST_AUTO_TEST_CASE(Test_GB_7071)2307 BOOST_AUTO_TEST_CASE(Test_GB_7071)
2308 {
2309 CRef<CSeq_entry> entry = unit_test_util::BuildGoodSeq();
2310
2311 CRef<CSeq_feat> intron = unit_test_util::AddMiscFeature(entry);
2312 intron->SetData().SetImp().SetKey("intron");
2313 intron->SetComment("group A");
2314
2315 string defline = "Sebaea microphylla intron.";
2316 AddTitle(entry, defline);
2317
2318 CheckDeflineMatches(entry);
2319
2320 }
2321
2322
BOOST_AUTO_TEST_CASE(Test_GB_7479)2323 BOOST_AUTO_TEST_CASE(Test_GB_7479)
2324 {
2325 CRef<CSeq_entry> entry = unit_test_util::BuildGoodSeq();
2326
2327 CRef<CSeq_feat> cds = unit_test_util::AddMiscFeature(entry);
2328 cds->SetData().SetCdregion();
2329 cds->ResetComment();
2330 cds->SetLocation().SetPartialStop(true, eExtreme_Biological);
2331
2332 CRef<CSeq_feat> gene = unit_test_util::AddMiscFeature(entry);
2333 gene->SetData().SetGene().SetDesc("cullin 1");
2334 gene->ResetComment();
2335 gene->SetLocation().SetPartialStop(true, eExtreme_Biological);
2336 gene->SetQual().push_back(CRef<CGb_qual>(new CGb_qual("pseudogene", "allelic")));
2337
2338 string defline = "Sebaea microphylla cullin 1 pseudogene, partial sequence.";
2339 AddTitle(entry, defline);
2340
2341 CheckDeflineMatches(entry);
2342 }
2343
2344
CheckInfluenzaDefline(const string & taxname,const string & strain,const string & serotype,const string & clone,const string & segment,const string & defline)2345 void CheckInfluenzaDefline(const string& taxname, const string& strain, const string& serotype, const string& clone, const string& segment, const string& defline)
2346 {
2347 CRef<CSeq_entry> entry = unit_test_util::BuildGoodSeq();
2348 unit_test_util::SetTaxname(entry, taxname);
2349 if (!NStr::IsBlank(strain)) {
2350 unit_test_util::SetOrgMod(entry, COrgMod::eSubtype_strain, strain);
2351 }
2352 if (!NStr::IsBlank(serotype)) {
2353 unit_test_util::SetOrgMod(entry, COrgMod::eSubtype_serotype, serotype);
2354 }
2355 if (!NStr::IsBlank(clone)) {
2356 unit_test_util::SetSubSource(entry, CSubSource::eSubtype_clone, clone);
2357 }
2358 if (!NStr::IsBlank(segment)) {
2359 unit_test_util::SetSubSource(entry, CSubSource::eSubtype_segment, segment);
2360 }
2361
2362 AddTitle(entry, defline);
2363
2364 CheckDeflineMatches(entry);
2365
2366 }
2367
2368
BOOST_AUTO_TEST_CASE(Test_GB_7485)2369 BOOST_AUTO_TEST_CASE(Test_GB_7485)
2370 {
2371 CheckInfluenzaDefline("Influenza A virus", "", "", "", "", "Influenza A virus sequence.");
2372 CheckInfluenzaDefline("Influenza B virus", "", "", "", "", "Influenza B virus sequence.");
2373 CheckInfluenzaDefline("Influenza A virus", "x", "", "", "", "Influenza A virus (x) sequence.");
2374 CheckInfluenzaDefline("Influenza B virus", "x", "", "", "", "Influenza B virus (x) sequence.");
2375 CheckInfluenzaDefline("Influenza A virus", "x", "y", "", "", "Influenza A virus (x(y)) sequence.");
2376 CheckInfluenzaDefline("Influenza B virus", "x", "y", "", "", "Influenza B virus (x) sequence.");
2377 CheckInfluenzaDefline("Influenza A virus", "", "y", "", "", "Influenza A virus ((y)) sequence.");
2378 CheckInfluenzaDefline("Influenza B virus", "", "y", "", "", "Influenza B virus sequence.");
2379 CheckInfluenzaDefline("Influenza A virus", "x", "y", "c", "", "Influenza A virus (x(y)) clone c sequence.");
2380 CheckInfluenzaDefline("Influenza B virus", "x", "y", "c", "", "Influenza B virus (x) clone c sequence.");
2381 CheckInfluenzaDefline("Influenza A virus", "x", "y", "", "1", "Influenza A virus (x(y)) segment 1 sequence.");
2382 CheckInfluenzaDefline("Influenza B virus", "x", "y", "", "1", "Influenza B virus (x) segment 1 sequence.");
2383 CheckInfluenzaDefline("Influenza A virus", "x", "y", "c", "1", "Influenza A virus (x(y)) clone c segment 1 sequence.");
2384 CheckInfluenzaDefline("Influenza B virus", "x", "y", "c", "1", "Influenza B virus (x) clone c segment 1 sequence.");
2385
2386 CheckInfluenzaDefline("Influenza A virus (x(y))", "x", "y", "c", "1", "Influenza A virus (x(y)) clone c segment 1 sequence.");
2387 CheckInfluenzaDefline("Influenza C virus (x)", "x", "y", "c", "1", "Influenza C virus (x) clone c segment 1 sequence.");
2388
2389 }
2390
2391
BOOST_AUTO_TEST_CASE(Test_GB_7534)2392 BOOST_AUTO_TEST_CASE(Test_GB_7534)
2393 {
2394 CRef<CSeq_entry> entry = unit_test_util::BuildGoodNucProtSet();
2395 unit_test_util::SetTaxname(entry, "Amomum chryseum");
2396 unit_test_util::SetGenome(entry, CBioSource::eGenome_chloroplast);
2397 CRef<CSeq_feat> prot = unit_test_util::GetProtFeatFromGoodNucProtSet(entry);
2398 prot->SetData().SetProt().SetName().front() = "maturase K";
2399
2400 CRef<CSeq_feat> cds = unit_test_util::GetCDSFromGoodNucProtSet(entry);
2401 CRef<CSeq_feat> gene1 = unit_test_util::MakeGeneForFeature(cds);
2402 gene1->SetData().SetGene().SetLocus("matK");
2403 CRef<CSeq_entry> nuc = unit_test_util::GetNucleotideSequenceFromGoodNucProtSet(entry);
2404 AddFeat(gene1, nuc);
2405 cds->SetXref().push_back(CRef<CSeqFeatXref>(new CSeqFeatXref()));
2406 cds->SetXref().front()->SetData().Assign(gene1->GetData());
2407
2408 CRef<CSeq_feat> gene2(new CSeq_feat());
2409 gene2->Assign(*gene1);
2410 gene2->SetData().SetGene().SetLocus("trnK");
2411 gene2->SetData().SetGene().SetDesc("tRNA-Lys");
2412 AddFeat(gene2, nuc);
2413 CRef<CSeq_feat> intron(new CSeq_feat());
2414 intron->Assign(*gene2);
2415 intron->SetData().SetImp().SetKey("intron");
2416 intron->SetXref().push_back(CRef<CSeqFeatXref>(new CSeqFeatXref()));
2417 intron->SetXref().front()->SetData().Assign(gene2->GetData());
2418 AddFeat(intron, nuc);
2419
2420 AddTitle(entry, "Amomum chryseum tRNA-Lys (trnK) gene, intron; and maturase K (matK) gene, complete cds; chloroplast.");
2421
2422 CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
2423
2424 CRef<CScope> scope(new CScope(*object_manager));
2425 CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
2426
2427 objects::CAutoDefWithTaxonomy autodef;
2428
2429 // add to autodef
2430 autodef.AddSources(seh);
2431 autodef.SetKeepIntrons(true);
2432
2433 CRef<CAutoDefModifierCombo> mod_combo;
2434 mod_combo = autodef.FindBestModifierCombo();
2435
2436
2437 CheckDeflineMatches(seh, autodef, mod_combo);
2438
2439 }
2440
2441
BOOST_AUTO_TEST_CASE(Test_SQD_4451)2442 BOOST_AUTO_TEST_CASE(Test_SQD_4451)
2443 {
2444 CRef<CSeq_entry> entry = BuildSequence();
2445 CRef<CSeqdesc> desc = AddSource(entry, "Fusarium incarnatum");
2446 CRef<CSeq_feat> feat1(new CSeq_feat());
2447 feat1->SetData().SetRna().SetType(CRNA_ref::eType_miscRNA);
2448 feat1->SetData().SetRna().SetExt().SetName("internal transcribed spacer region");
2449 AddFeat(feat1, entry);
2450 feat1->SetLocation().SetInt().SetFrom(0);
2451 feat1->SetLocation().SetInt().SetTo(59);
2452 feat1->SetLocation().SetPartialStart(true, eExtreme_Biological);
2453 feat1->SetLocation().SetPartialStop(true, eExtreme_Biological);
2454
2455 AddTitle(entry, "Fusarium incarnatum internal transcribed spacer region, partial sequence.");
2456
2457 CheckDeflineMatches(entry);
2458 }
2459
BOOST_AUTO_TEST_CASE(Test_SQD_4529)2460 BOOST_AUTO_TEST_CASE(Test_SQD_4529)
2461 {
2462 CRef<CSeq_entry> entry = BuildSequence();
2463 CRef<CSeqdesc> desc = AddSource(entry, "Fusarium incarnatum");
2464 CRef<CSeq_feat> feat1 = unit_test_util::AddMiscFeature(entry);
2465 feat1->SetComment("similar to beta-tubulin");
2466
2467 AddTitle(entry, "Fusarium incarnatum beta-tubulin-like gene, complete sequence.");
2468
2469 CheckDeflineMatches(entry, true, CAutoDefOptions::eListAllFeatures, CAutoDefOptions::eDelete);
2470
2471 CRef<objects::CSeq_feat> rrna1 = unit_test_util::AddMiscFeature(entry);
2472 rrna1->ResetComment();
2473 rrna1->SetData().SetRna().SetType(CRNA_ref::eType_rRNA);
2474 rrna1->SetData().SetRna().SetExt().SetName("foo");
2475 AddTitle(entry, "Fusarium incarnatum foo gene, complete sequence.");
2476 CheckDeflineMatches(entry, true, CAutoDefOptions::eListAllFeatures, CAutoDefOptions::eDelete);
2477
2478 }
2479
2480
AddProtFeat(CRef<CSeq_entry> prot,CProt_ref::EProcessed proc)2481 void AddProtFeat(CRef<CSeq_entry> prot, CProt_ref::EProcessed proc)
2482 {
2483 CRef<CSeq_feat> p = unit_test_util::AddMiscFeature(prot);
2484 p->SetData().SetProt().SetProcessed(proc);
2485 p->SetData().SetProt().SetName().clear();
2486 p->SetData().SetProt().SetName().push_back("RdRp");
2487 p->ResetComment();
2488 }
2489
2490
TestMatPeptideListing(bool cds_is_partial,bool has_sig_peptide)2491 void TestMatPeptideListing(bool cds_is_partial, bool has_sig_peptide)
2492 {
2493 CRef<CSeq_entry> entry = unit_test_util::BuildGoodNucProtSet();
2494 CRef<CSeq_feat> cds = unit_test_util::GetCDSFromGoodNucProtSet(entry);
2495 if (cds_is_partial) {
2496 cds->SetLocation().SetPartialStop(true, eExtreme_Biological);
2497 cds->SetPartial(true);
2498 }
2499 CRef<CSeq_feat> gene = unit_test_util::MakeGeneForFeature(cds);
2500 gene->SetData().SetGene().SetLocus("ORF1");
2501 CRef<CSeq_entry> nuc = unit_test_util::GetNucleotideSequenceFromGoodNucProtSet(entry);
2502 unit_test_util::AddFeat(gene, nuc);
2503 CRef<CSeq_feat> pfeat = unit_test_util::GetProtFeatFromGoodNucProtSet(entry);
2504 pfeat->SetData().SetProt().SetName().clear();
2505 pfeat->SetData().SetProt().SetName().push_back("nonstructural polyprotein");
2506 CRef<CSeq_entry> prot = unit_test_util::GetProteinSequenceFromGoodNucProtSet(entry);
2507 AddProtFeat(prot, CProt_ref::eProcessed_mature);
2508 if (has_sig_peptide) {
2509 AddProtFeat(prot, CProt_ref::eProcessed_signal_peptide);
2510 }
2511
2512 if (cds_is_partial) {
2513 if (has_sig_peptide) {
2514 AddTitle(nuc, "Sebaea microphylla nonstructural polyprotein (ORF1) gene, partial cds.");
2515 } else {
2516 AddTitle(nuc, "Sebaea microphylla nonstructural polyprotein, RdRp region, (ORF1) gene, partial cds.");
2517 }
2518 } else {
2519 AddTitle(nuc, "Sebaea microphylla nonstructural polyprotein (ORF1) gene, complete cds.");
2520 }
2521 CheckDeflineMatches(entry, true, CAutoDefOptions::eListAllFeatures, CAutoDefOptions::eDelete);
2522
2523 }
2524
BOOST_AUTO_TEST_CASE(Test_SQD_4593)2525 BOOST_AUTO_TEST_CASE(Test_SQD_4593)
2526 {
2527 TestMatPeptideListing(true, false);
2528 TestMatPeptideListing(true, true);
2529 TestMatPeptideListing(false, false);
2530 TestMatPeptideListing(false, true);
2531 }
2532
2533
BOOST_AUTO_TEST_CASE(Test_SQD_4607)2534 BOOST_AUTO_TEST_CASE(Test_SQD_4607)
2535 {
2536 CRef<CSeq_entry> entry = unit_test_util::BuildGoodSeq();
2537 CRef<CSeq_feat> feat1 = unit_test_util::AddMiscFeature(entry);
2538 feat1->SetComment("contains promoter and 5' UTR");
2539
2540 AddTitle(entry, "Sebaea microphylla promoter region and 5' UTR, genomic sequence.");
2541
2542 CheckDeflineMatches(entry, true, CAutoDefOptions::eListAllFeatures, CAutoDefOptions::eDelete);
2543
2544 }
2545
2546
CheckRegulatoryFeatures(const string & expected_title,bool keep_promoters,bool keep_regulatory)2547 void CheckRegulatoryFeatures(const string& expected_title, bool keep_promoters, bool keep_regulatory)
2548 {
2549 CRef<CSeq_entry> entry = unit_test_util::BuildGoodSeq();
2550 CRef<CSeq_feat> promoter = unit_test_util::AddMiscFeature(entry);
2551 promoter->SetData().SetImp().SetKey("regulatory");
2552 promoter->SetQual().push_back(CRef<CGb_qual>(new CGb_qual("regulatory_class", "promoter")));
2553 promoter->ResetComment();
2554 CRef<CSeq_feat> rbs = unit_test_util::AddMiscFeature(entry);
2555 rbs->SetData().SetImp().SetKey("regulatory");
2556 rbs->SetQual().push_back(CRef<CGb_qual>(new CGb_qual("regulatory_class", "ribosome_binding_site")));
2557 rbs->ResetComment();
2558
2559 CRef<CSeq_feat> gene = unit_test_util::AddMiscFeature(entry);
2560 gene->SetData().SetGene().SetLocus("msa");
2561 gene->SetData().SetGene().SetDesc("mannose-specific adhesin");
2562 gene->ResetComment();
2563
2564 AddTitle(entry, expected_title);
2565
2566 CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
2567
2568 CRef<CScope> scope(new CScope(*object_manager));
2569 CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
2570
2571 objects::CAutoDefWithTaxonomy autodef;
2572
2573 // add to autodef
2574 autodef.AddSources(seh);
2575
2576 CRef<CAutoDefModifierCombo> mod_combo = autodef.FindBestModifierCombo();
2577
2578 autodef.SetFeatureListType(CAutoDefOptions::eListAllFeatures);
2579 autodef.SetKeepRegulatoryFeatures(keep_regulatory);
2580 autodef.SetUseFakePromoters(keep_promoters);
2581
2582 CheckDeflineMatches(seh, autodef, mod_combo);
2583
2584 }
2585
2586
BOOST_AUTO_TEST_CASE(Test_SQD_4612)2587 BOOST_AUTO_TEST_CASE(Test_SQD_4612)
2588 {
2589 CheckRegulatoryFeatures("Sebaea microphylla mannose-specific adhesin (msa) gene, promoter region.", false, false);
2590 CheckRegulatoryFeatures("Sebaea microphylla mannose-specific adhesin (msa) gene, promoter region.", true, false);
2591 CheckRegulatoryFeatures("Sebaea microphylla mannose-specific adhesin (msa) gene, promoter region and ribosome_binding_site.", true, true);
2592
2593 }
2594
BOOST_AUTO_TEST_CASE(Test_GB_8547)2595 BOOST_AUTO_TEST_CASE(Test_GB_8547)
2596 {
2597 CRef<CSeq_entry> entry = unit_test_util::BuildGoodSeq();
2598 unit_test_util::SetTaxname(entry, "Influenza A virus");
2599 unit_test_util::SetOrgMod(entry, COrgMod::eSubtype_strain, "A/Florida/57/2019");
2600 unit_test_util::SetSubSource(entry, CSubSource::eSubtype_segment, "5");
2601
2602 AddTitle(entry, "Influenza A virus (A/Florida/57/2019) segment 5 sequence.");
2603
2604 CheckDeflineMatches(entry);
2605 }
2606
BOOST_AUTO_TEST_CASE(Test_GB_8604)2607 BOOST_AUTO_TEST_CASE(Test_GB_8604)
2608 {
2609 CRef<CSeq_entry> entry = unit_test_util::BuildGoodNucProtSet();
2610 CRef<CSeq_entry> nuc = unit_test_util::GetNucleotideSequenceFromGoodNucProtSet(entry);
2611 CRef<CSeq_feat> cds = unit_test_util::GetCDSFromGoodNucProtSet(entry);
2612 cds->SetLocation().SetPartialStart(true, eExtreme_Biological);
2613 cds->SetPartial(true);
2614 CRef<CSeq_feat> pfeat = unit_test_util::GetProtFeatFromGoodNucProtSet(entry);
2615 pfeat->SetData().SetProt().SetName().front() = "proannomuricatin G";
2616 CRef<CSeq_feat> mrna = unit_test_util::MakemRNAForCDS(cds);
2617 mrna->SetData().SetRna().SetExt().SetName("proannomuricatin G");
2618 unit_test_util::AddFeat(mrna, nuc);
2619 CRef<CSeq_feat> gene = unit_test_util::MakeGeneForFeature(mrna);
2620 gene->SetData().SetGene().SetLocus("PamG");
2621 unit_test_util::AddFeat(gene, nuc);
2622
2623 // check without mat-peptide first
2624 AddTitle(nuc, "Sebaea microphylla proannomuricatin G (PamG) gene, partial cds.");
2625
2626 CheckDeflineMatches(entry);
2627
2628 // check with mat-peptide
2629 CRef<CSeq_entry> prot = unit_test_util::GetProteinSequenceFromGoodNucProtSet(entry);
2630 CRef<CSeq_feat> mat_peptide = unit_test_util::AddMiscFeature(prot);
2631 mat_peptide->ResetComment();
2632 mat_peptide->SetData().SetProt().SetProcessed(CProt_ref::eProcessed_mature);
2633 mat_peptide->SetData().SetProt().SetName().push_back("annomuricatin G");
2634
2635 // if suppressing mat-peptide, no change
2636 CheckDeflineMatches(entry, CSeqFeatData::eSubtype_mat_peptide_aa);
2637
2638 // show when not suppressing
2639 AddTitle(entry, "Sebaea microphylla proannomuricatin G, annomuricatin G region, (PamG) gene, partial cds.");
2640 CheckDeflineMatches(entry);
2641 }
2642
MakeRegulatoryFeature(const string & reg_class,const string & comment,TSeqPos start_pos,CRef<CSeq_entry> entry)2643 CRef<CSeq_feat> MakeRegulatoryFeature(const string& reg_class, const string& comment, TSeqPos start_pos, CRef<CSeq_entry> entry)
2644 {
2645 CRef<CSeq_feat> reg = unit_test_util::AddMiscFeature(entry);
2646 reg->SetData().SetImp().SetKey("regulatory");
2647 reg->SetComment(comment);
2648 reg->SetQual().push_back(CRef<CGb_qual>(new CGb_qual("regulatory_class", reg_class)));
2649 reg->SetLocation().SetInt().SetFrom(start_pos);
2650 reg->SetLocation().SetInt().SetTo(start_pos + 4);
2651 return reg;
2652 }
2653
MakeRptRegion(const string & rpt_type,TSeqPos start_pos,CRef<CSeq_entry> entry)2654 CRef<CSeq_feat> MakeRptRegion(const string& rpt_type, TSeqPos start_pos, CRef<CSeq_entry> entry)
2655 {
2656 CRef<CSeq_feat> reg = unit_test_util::AddMiscFeature(entry);
2657 reg->ResetComment();
2658 reg->SetData().SetImp().SetKey("repeat_region");
2659 reg->SetQual().push_back(CRef<CGb_qual>(new CGb_qual("rpt_type", rpt_type)));
2660 reg->SetLocation().SetInt().SetFrom(start_pos);
2661 reg->SetLocation().SetInt().SetTo(start_pos + 4);
2662 return reg;
2663 }
2664
2665
TestRepeatRegion(CRef<CSeq_entry> entry)2666 void TestRepeatRegion(CRef<CSeq_entry> entry)
2667 {
2668 CRef<CObjectManager> object_manager = CObjectManager::GetInstance();
2669
2670 CRef<CScope> scope(new CScope(*object_manager));
2671 CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*entry);
2672
2673 objects::CAutoDefWithTaxonomy autodef;
2674
2675 // add to autodef
2676 autodef.AddSources(seh);
2677
2678 CRef<CAutoDefModifierCombo> mod_combo = autodef.FindBestModifierCombo();
2679
2680 autodef.SetFeatureListType(CAutoDefOptions::eListAllFeatures);
2681 autodef.SetKeepRepeatRegion(true);
2682
2683 CheckDeflineMatches(seh, autodef, mod_combo);
2684
2685 }
2686
BOOST_AUTO_TEST_CASE(Test_GB_8854)2687 BOOST_AUTO_TEST_CASE(Test_GB_8854)
2688 {
2689 CRef<CSeq_entry> entry = unit_test_util::BuildGoodSeq();
2690
2691 CRef<CSeq_feat> rpt = MakeRptRegion("long_terminal_repeat", 15, entry);
2692 AddTitle(entry, "Sebaea microphylla LTR repeat region.");
2693 CheckDeflineMatches(entry);
2694 TestRepeatRegion(entry);
2695
2696 CRef<CSeq_feat> reg1 = MakeRegulatoryFeature("CAAT_signal", "U3 region", 0, entry);
2697 CRef<CSeq_feat> reg2 = MakeRegulatoryFeature("TATA_box", "U3 region", 5, entry);
2698 CRef<CSeq_feat> reg3 = MakeRegulatoryFeature("polyA_signal_sequence", "R-region", 10, entry);
2699
2700 TestRepeatRegion(entry);
2701 }
2702
2703
BOOST_AUTO_TEST_CASE(Test_ClauseListOptions)2704 BOOST_AUTO_TEST_CASE(Test_ClauseListOptions)
2705 {
2706 CRef<CSeq_entry> entry = unit_test_util::BuildGoodSeq();
2707
2708 AddTitle(entry, "Sebaea microphylla, complete sequence.");
2709 CheckDeflineMatches(entry, true, CAutoDefOptions::eCompleteSequence);
2710
2711 AddTitle(entry, "Sebaea microphylla, complete genome.");
2712 CheckDeflineMatches(entry, true, CAutoDefOptions::eCompleteGenome);
2713
2714 AddTitle(entry, "Sebaea microphylla, partial sequence.");
2715 CheckDeflineMatches(entry, true, CAutoDefOptions::ePartialSequence);
2716
2717 AddTitle(entry, "Sebaea microphylla, partial genome.");
2718 CheckDeflineMatches(entry, true, CAutoDefOptions::ePartialGenome);
2719
2720 AddTitle(entry, "Sebaea microphylla whole genome shotgun sequence.");
2721 CheckDeflineMatches(entry, true, CAutoDefOptions::eWholeGenomeShotgunSequence);
2722 }
2723
2724
2725 END_SCOPE(objects)
2726 END_NCBI_SCOPE
2727