1 /* $Id: unit_test_util.cpp 607137 2020-04-30 12:57:57Z grichenk $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author:  Jonathan Kans, Clifford Clausen, Aaron Ucko.......
27  *
28  * File Description:
29  *   Validates objects::CSeq_entries and objects::CSeq_submits
30  *
31  */
32 #include <ncbi_pch.hpp>
33 #include <corelib/ncbistd.hpp>
34 #include <objects/general/Object_id.hpp>
35 #include <objects/general/Dbtag.hpp>
36 #include <objects/seqfeat/BioSource.hpp>
37 #include <objects/seqfeat/Org_ref.hpp>
38 #include <objects/seqfeat/Imp_feat.hpp>
39 #include <objects/seqfeat/Cdregion.hpp>
40 #include <objects/seq/Seq_descr.hpp>
41 #include <objects/biblio/Id_pat.hpp>
42 #include <objects/biblio/Title.hpp>
43 #include <objects/general/Dbtag.hpp>
44 #include <objects/general/User_object.hpp>
45 #include <objects/medline/Medline_entry.hpp>
46 #include <objects/misc/sequence_macros.hpp>
47 #include <objects/pub/Pub_equiv.hpp>
48 #include <objects/pub/Pub.hpp>
49 #include <objects/seq/GIBB_mol.hpp>
50 #include <objects/seq/Seq_ext.hpp>
51 #include <objects/seq/Delta_ext.hpp>
52 #include <objects/seq/Delta_seq.hpp>
53 #include <objects/seq/Seq_literal.hpp>
54 #include <objects/seq/Ref_ext.hpp>
55 #include <objects/seq/Map_ext.hpp>
56 #include <objects/seq/Seg_ext.hpp>
57 #include <objects/seq/Seq_gap.hpp>
58 #include <objects/seq/Seq_data.hpp>
59 #include <objects/seq/Seq_descr.hpp>
60 #include <objects/seq/Seqdesc.hpp>
61 #include <objects/seq/MolInfo.hpp>
62 #include <objects/seq/Pubdesc.hpp>
63 #include <objects/seq/Seq_hist.hpp>
64 #include <objects/seq/Seq_hist_rec.hpp>
65 #include <objects/seqalign/Dense_seg.hpp>
66 #include <objects/seqblock/GB_block.hpp>
67 #include <objects/seqblock/EMBL_block.hpp>
68 #include <objects/seqfeat/BioSource.hpp>
69 #include <objects/seqfeat/Org_ref.hpp>
70 #include <objects/seqfeat/OrgName.hpp>
71 #include <objects/seqfeat/SubSource.hpp>
72 #include <objects/seqfeat/Imp_feat.hpp>
73 #include <objects/seqfeat/Cdregion.hpp>
74 #include <objects/seqloc/Seq_id.hpp>
75 #include <objects/seqloc/PDB_seq_id.hpp>
76 #include <objects/seqloc/Giimport_id.hpp>
77 #include <objects/seqloc/Patent_seq_id.hpp>
78 #include <objects/seqloc/Seq_loc.hpp>
79 #include <objects/seqloc/Seq_interval.hpp>
80 #include <objmgr/util/sequence.hpp>
81 #include <objects/misc/sequence_macros.hpp>
82 
83 #include <objtools/unit_test_util/unit_test_util.hpp>
84 
85 #include <functional>
86 
87 BEGIN_NCBI_SCOPE
BEGIN_SCOPE(objects)88 BEGIN_SCOPE(objects)
89 BEGIN_SCOPE(unit_test_util)
90 
91 
92 void SetDbxref (objects::CBioSource& src, string db, objects::CObject_id::TId id)
93 {
94     CRef<objects::CDbtag> dbtag(new objects::CDbtag());
95     dbtag->SetDb(db);
96     dbtag->SetTag().SetId(id);
97     src.SetOrg().SetDb().push_back(dbtag);
98 }
99 
100 
RemoveDbxref(objects::CBioSource & src,string db,objects::CObject_id::TId id)101 void RemoveDbxref (objects::CBioSource& src, string db, objects::CObject_id::TId id)
102 {
103     if (src.IsSetOrg() && src.GetOrg().IsSetDb()) {
104         objects::COrg_ref::TDb::iterator it = src.SetOrg().SetDb().begin();
105         while (it != src.SetOrg().SetDb().end()) {
106             if ((NStr::IsBlank(db) || ((*it)->IsSetDb() && NStr::Equal((*it)->GetDb(), db)))
107                 && (id == 0 || ((*it)->IsSetTag() && (*it)->GetTag().IsId() && (*it)->GetTag().GetId() == id))) {
108                 it = src.SetOrg().SetDb().erase(it);
109             } else {
110                 ++it;
111             }
112         }
113     }
114 }
115 
116 
SetTaxon(objects::CBioSource & src,size_t taxon)117 void SetTaxon (objects::CBioSource& src, size_t taxon)
118 {
119     if (taxon == 0) {
120         RemoveDbxref (src, "taxon", 0);
121     } else {
122         SetDbxref(src, "taxon", taxon);
123     }
124 }
125 
126 
BuildGoodSeq(void)127 CRef<objects::CSeq_entry> BuildGoodSeq(void)
128 {
129     CRef<objects::CSeq_entry> entry(new objects::CSeq_entry());
130     entry->SetSeq().SetInst().SetMol(objects::CSeq_inst::eMol_dna);
131     entry->SetSeq().SetInst().SetRepr(objects::CSeq_inst::eRepr_raw);
132     entry->SetSeq().SetInst().SetSeq_data().SetIupacna().Set("AATTGGCCAAAATTGGCCAAAATTGGCCAAAATTGGCCAAAATTGGCCAAAATTGGCCAA");
133     entry->SetSeq().SetInst().SetLength(60);
134 
135     CRef<objects::CSeq_id> id(new objects::CSeq_id());
136     id->SetLocal().SetStr ("good");
137     entry->SetSeq().SetId().push_back(id);
138 
139     CRef<objects::CSeqdesc> mdesc(new objects::CSeqdesc());
140     mdesc->SetMolinfo().SetBiomol(objects::CMolInfo::eBiomol_genomic);
141     entry->SetSeq().SetDescr().Set().push_back(mdesc);
142 
143     AddGoodSource (entry);
144     AddGoodPub(entry);
145 
146     return entry;
147 }
148 
149 
BuildGoodPubSeqdesc()150 CRef<objects::CSeqdesc> BuildGoodPubSeqdesc()
151 {
152     CRef<objects::CSeqdesc> pdesc(new objects::CSeqdesc());
153     CRef<objects::CPub> pub(new objects::CPub());
154     pub->SetPmid(CPub::TPmid(ENTREZ_ID_CONST(1)));
155     pdesc->SetPub().SetPub().Set().push_back(pub);
156 
157     return pdesc;
158 }
159 
160 
AddGoodPub(CRef<objects::CSeq_entry> entry)161 void AddGoodPub (CRef<objects::CSeq_entry> entry)
162 {
163     CRef<objects::CSeqdesc> pdesc = BuildGoodPubSeqdesc();
164 
165     if (entry->IsSeq()) {
166         entry->SetSeq().SetDescr().Set().push_back(pdesc);
167     } else if (entry->IsSet()) {
168         entry->SetSet().SetDescr().Set().push_back(pdesc);
169     }
170 
171     CRef<objects::CSeqdesc> pdesc2 = BuildGoodPubSeqdesc();
172     pdesc2->SetPub().SetPub().Set().front()->Assign(*BuildGoodCitSubPub());
173     if (entry->IsSeq()) {
174         entry->SetSeq().SetDescr().Set().push_back(pdesc2);
175     } else if (entry->IsSet()) {
176         entry->SetSet().SetDescr().Set().push_back(pdesc2);
177     }
178 
179 }
180 
181 
AddGoodSource(CRef<objects::CSeq_entry> entry)182 void AddGoodSource (CRef<objects::CSeq_entry> entry)
183 {
184     CRef<objects::CSeqdesc> odesc(new objects::CSeqdesc());
185     odesc->SetSource().SetOrg().SetTaxname("Sebaea microphylla");
186     odesc->SetSource().SetOrg().SetOrgname().SetLineage("some lineage");
187     SetTaxon(odesc->SetSource(), 592768);
188     CRef<objects::CSubSource> subsrc(new objects::CSubSource());
189     subsrc->SetSubtype(objects::CSubSource::eSubtype_chromosome);
190     subsrc->SetName("1");
191     odesc->SetSource().SetSubtype().push_back(subsrc);
192 
193     if (entry->IsSeq()) {
194         entry->SetSeq().SetDescr().Set().push_back(odesc);
195     } else if (entry->IsSet()) {
196         entry->SetSet().SetDescr().Set().push_back(odesc);
197     }
198 }
199 
200 
SetDbxref(objects::CBioSource & src,string db,string id)201 void SetDbxref (objects::CBioSource& src, string db, string id)
202 {
203     CRef<objects::CDbtag> dbtag(new objects::CDbtag());
204     dbtag->SetDb(db);
205     dbtag->SetTag().SetStr(id);
206     src.SetOrg().SetDb().push_back(dbtag);
207 }
208 
209 
SetDbxref(CRef<objects::CSeq_entry> entry,string db,objects::CObject_id::TId id)210 void SetDbxref (CRef<objects::CSeq_entry> entry, string db, objects::CObject_id::TId id)
211 {
212     if (!entry) {
213         return;
214     }
215     if (entry->IsSeq()) {
216         NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSeq().SetDescr().Set()) {
217             if ((*it)->IsSource()) {
218                 SetDbxref((*it)->SetSource(), db, id);
219             }
220         }
221     } else if (entry->IsSet()) {
222         NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSet().SetDescr().Set()) {
223             if ((*it)->IsSource()) {
224                 SetDbxref((*it)->SetSource(), db, id);
225             }
226         }
227     }
228 }
229 
230 
SetDbxref(CRef<objects::CSeq_entry> entry,string db,string id)231 void SetDbxref (CRef<objects::CSeq_entry> entry, string db, string id)
232 {
233     if (!entry) {
234         return;
235     }
236     if (entry->IsSeq()) {
237         NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSeq().SetDescr().Set()) {
238             if ((*it)->IsSource()) {
239                 SetDbxref((*it)->SetSource(), db, id);
240             }
241         }
242     } else if (entry->IsSet()) {
243         NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSet().SetDescr().Set()) {
244             if ((*it)->IsSource()) {
245                 SetDbxref((*it)->SetSource(), db, id);
246             }
247         }
248     }
249 }
250 
251 
SetDbxref(CRef<objects::CSeq_feat> feat,string db,objects::CObject_id::TId id)252 void SetDbxref (CRef<objects::CSeq_feat> feat, string db, objects::CObject_id::TId id)
253 {
254     if (!feat) {
255         return;
256     }
257     CRef<objects::CDbtag> dbtag(new objects::CDbtag());
258     dbtag->SetDb(db);
259     dbtag->SetTag().SetId(id);
260     feat->SetDbxref().push_back(dbtag);
261 }
262 
263 
SetDbxref(CRef<objects::CSeq_feat> feat,string db,string id)264 void SetDbxref (CRef<objects::CSeq_feat> feat, string db, string id)
265 {
266     if (!feat) {
267         return;
268     }
269     CRef<objects::CDbtag> dbtag(new objects::CDbtag());
270     dbtag->SetDb(db);
271     dbtag->SetTag().SetStr(id);
272     feat->SetDbxref().push_back(dbtag);
273 }
274 
275 
276 
RemoveDbxref(CRef<objects::CSeq_entry> entry,string db,objects::CObject_id::TId id)277 void RemoveDbxref (CRef<objects::CSeq_entry> entry, string db, objects::CObject_id::TId id)
278 {
279     if (!entry) {
280         return;
281     }
282     if (entry->IsSeq()) {
283         NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSeq().SetDescr().Set()) {
284             if ((*it)->IsSource()) {
285                 RemoveDbxref((*it)->SetSource(), db, id);
286             }
287         }
288     } else if (entry->IsSet()) {
289         NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSet().SetDescr().Set()) {
290             if ((*it)->IsSource()) {
291                 RemoveDbxref((*it)->SetSource(), db, id);
292             }
293         }
294     }
295 }
296 
297 
RemoveDbxref(CRef<objects::CSeq_feat> feat,string db,objects::CObject_id::TId id)298 void RemoveDbxref (CRef<objects::CSeq_feat> feat, string db, objects::CObject_id::TId id)
299 {
300     if (!feat) {
301         return;
302     }
303     if (feat->IsSetDbxref()) {
304         objects::CSeq_feat::TDbxref::iterator it = feat->SetDbxref().begin();
305         while (it != feat->SetDbxref().end()) {
306             if ((NStr::IsBlank(db) || ((*it)->IsSetDb() && NStr::Equal((*it)->GetDb(), db)))
307                 && (id == 0 || ((*it)->IsSetTag() && (*it)->GetTag().IsId() && (*it)->GetTag().GetId() == id))) {
308                 it = feat->SetDbxref().erase(it);
309             } else {
310                 ++it;
311             }
312         }
313     }
314 }
315 
316 
SetTaxon(CRef<objects::CSeq_entry> entry,size_t taxon)317 void SetTaxon (CRef<objects::CSeq_entry> entry, size_t taxon)
318 {
319     if (!entry) {
320         return;
321     }
322     if (entry->IsSeq()) {
323         NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSeq().SetDescr().Set()) {
324             if ((*it)->IsSource()) {
325                 SetTaxon((*it)->SetSource(), taxon);
326             }
327         }
328     } else if (entry->IsSet()) {
329         NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSet().SetDescr().Set()) {
330             if ((*it)->IsSource()) {
331                 SetTaxon((*it)->SetSource(), taxon);
332             }
333         }
334     }
335 }
336 
337 
AddFeatAnnotToSeqEntry(CRef<objects::CSeq_annot> annot,CRef<objects::CSeq_entry> entry)338 void AddFeatAnnotToSeqEntry (CRef<objects::CSeq_annot> annot, CRef<objects::CSeq_entry> entry)
339 {
340     if (!entry || !annot) {
341         return;
342     }
343     if (entry->IsSeq()) {
344         entry->SetSeq().SetAnnot().push_back(annot);
345     } else if (entry->IsSet()) {
346         if (entry->GetSet().IsSetSeq_set()) {
347             AddFeatAnnotToSeqEntry (annot, entry->SetSet().SetSeq_set().front());
348         }
349     }
350 }
351 
352 
AddFeat(CRef<objects::CSeq_feat> feat,CRef<objects::CSeq_entry> entry)353 CRef<objects::CSeq_annot> AddFeat (CRef<objects::CSeq_feat> feat, CRef<objects::CSeq_entry> entry)
354 {
355     CRef<objects::CSeq_annot> annot;
356 
357     if (entry->IsSeq()) {
358         if (!entry->GetSeq().IsSetAnnot()
359             || !entry->GetSeq().GetAnnot().front()->IsFtable()) {
360             CRef<objects::CSeq_annot> new_annot(new objects::CSeq_annot());
361             entry->SetSeq().SetAnnot().push_back(new_annot);
362             annot = new_annot;
363         } else {
364             annot = entry->SetSeq().SetAnnot().front();
365         }
366     } else if (entry->IsSet()) {
367         if (!entry->GetSet().IsSetAnnot()
368             || !entry->GetSet().GetAnnot().front()->IsFtable()) {
369             CRef<objects::CSeq_annot> new_annot(new objects::CSeq_annot());
370             entry->SetSet().SetAnnot().push_back(new_annot);
371             annot = new_annot;
372         } else {
373             annot = entry->SetSet().SetAnnot().front();
374         }
375     }
376     annot->SetData().SetFtable().push_back(feat);
377     return annot;
378 }
379 
AddProtFeat(CRef<objects::CSeq_entry> entry)380 CRef<objects::CSeq_feat> AddProtFeat(CRef<objects::CSeq_entry> entry)
381 {
382     CRef<objects::CSeq_feat> feat (new objects::CSeq_feat());
383     feat->SetData().SetProt().SetName().push_back("fake protein name");
384     feat->SetLocation().SetInt().SetId().Assign(*(entry->GetSeq().GetId().front()));
385     feat->SetLocation().SetInt().SetFrom(0);
386     feat->SetLocation().SetInt().SetTo(entry->GetSeq().GetInst().GetLength() - 1);
387     AddFeat (feat, entry);
388     return feat;
389 }
390 
391 
AddGoodSourceFeature(CRef<objects::CSeq_entry> entry)392 CRef<objects::CSeq_feat> AddGoodSourceFeature(CRef<objects::CSeq_entry> entry)
393 {
394     CRef<objects::CSeq_feat> feat(new objects::CSeq_feat());
395     feat->SetData().SetBiosrc().SetOrg().SetTaxname("Trichechus manatus");
396     SetTaxon (feat->SetData().SetBiosrc(), 9778);
397     feat->SetData().SetBiosrc().SetOrg().SetOrgname().SetLineage("some lineage");
398     feat->SetLocation().SetInt().SetId().SetLocal().SetStr("good");
399     feat->SetLocation().SetInt().SetFrom(0);
400     feat->SetLocation().SetInt().SetTo(5);
401     CRef<objects::CSeq_annot> annot(new objects::CSeq_annot());
402     annot->SetData().SetFtable().push_back(feat);
403     AddFeatAnnotToSeqEntry (annot, entry);
404     return feat;
405 }
406 
407 
MakeMiscFeature(CRef<objects::CSeq_id> id,size_t right_end,size_t left_end)408 CRef<objects::CSeq_feat> MakeMiscFeature(CRef<objects::CSeq_id> id, size_t right_end, size_t left_end)
409 {
410     CRef<objects::CSeq_feat> feat(new objects::CSeq_feat());
411     feat->SetLocation().SetInt().SetId().Assign(*id);
412     feat->SetLocation().SetInt().SetFrom(left_end);
413     feat->SetLocation().SetInt().SetTo(right_end);
414     feat->SetData().SetImp().SetKey("misc_feature");
415     return feat;
416 }
417 
418 
BuildGoodFeat()419 CRef<CSeq_feat> BuildGoodFeat ()
420 {
421     CRef<CSeq_feat> feat(new CSeq_feat());
422     feat->SetLocation().SetInt().SetId().SetLocal().SetStr("good");
423     feat->SetLocation().SetInt().SetFrom(0);
424     feat->SetLocation().SetInt().SetTo(59);
425     feat->SetData().SetImp().SetKey("misc_feature");
426 
427     return feat;
428 }
429 
430 
IdFromEntry(CRef<objects::CSeq_entry> entry)431 CRef<objects::CSeq_id> IdFromEntry(CRef<objects::CSeq_entry> entry)
432 {
433     if (entry->IsSeq()) {
434         return entry->SetSeq().SetId().front();
435     } else if (entry->IsSet()) {
436         return IdFromEntry (entry->SetSet().SetSeq_set().front());
437     } else {
438         CRef<objects::CSeq_id> empty;
439         return empty;
440     }
441 }
442 
443 
AddMiscFeature(CRef<objects::CSeq_entry> entry,size_t right_end)444 CRef<objects::CSeq_feat> AddMiscFeature(CRef<objects::CSeq_entry> entry, size_t right_end)
445 {
446     CRef<objects::CSeq_feat> feat = MakeMiscFeature(IdFromEntry(entry), right_end);
447     feat->SetComment("misc_feature needs a comment");
448     AddFeat (feat, entry);
449     return feat;
450 }
451 
452 
AddMiscFeature(CRef<objects::CSeq_entry> entry)453 CRef<objects::CSeq_feat> AddMiscFeature(CRef<objects::CSeq_entry> entry)
454 {
455     return AddMiscFeature (entry, 10);
456 }
457 
458 
SetTaxname(CRef<objects::CSeq_entry> entry,string taxname)459 void SetTaxname (CRef<objects::CSeq_entry> entry, string taxname)
460 {
461     if (!entry) {
462         return;
463     }
464     if (entry->IsSeq()) {
465         NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSeq().SetDescr().Set()) {
466             if ((*it)->IsSource()) {
467                 if (NStr::IsBlank(taxname)) {
468                     (*it)->SetSource().SetOrg().ResetTaxname();
469                 } else {
470                     (*it)->SetSource().SetOrg().SetTaxname(taxname);
471                 }
472             }
473         }
474     } else if (entry->IsSet()) {
475         NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSet().SetDescr().Set()) {
476             if ((*it)->IsSource()) {
477                 if (NStr::IsBlank(taxname)) {
478                     (*it)->SetSource().SetOrg().ResetTaxname();
479                 } else {
480                     (*it)->SetSource().SetOrg().SetTaxname(taxname);
481                 }
482             }
483         }
484     }
485 }
486 
487 
SetSebaea_microphylla(CRef<objects::CSeq_entry> entry)488 void SetSebaea_microphylla(CRef<objects::CSeq_entry> entry)
489 {
490     SetTaxname(entry, "Sebaea microphylla");
491     SetTaxon(entry, 0);
492     SetTaxon(entry, 592768);
493 }
494 
495 
SetSynthetic_construct(CRef<objects::CSeq_entry> entry)496 void SetSynthetic_construct(CRef<objects::CSeq_entry> entry)
497 {
498     SetTaxname(entry, "synthetic construct");
499     SetTaxon(entry, 0);
500     SetTaxon(entry, 32630);
501 }
502 
503 
SetDrosophila_melanogaster(CRef<objects::CSeq_entry> entry)504 void SetDrosophila_melanogaster(CRef<objects::CSeq_entry> entry)
505 {
506     SetTaxname(entry, "Drosophila melanogaster");
507     SetTaxon(entry, 0);
508     SetTaxon(entry, 7227);
509 }
510 
SetCommon(CRef<objects::CSeq_entry> entry,string common)511 void SetCommon (CRef<objects::CSeq_entry> entry, string common)
512 {
513     if (!entry) {
514         return;
515     }
516     if (entry->IsSeq()) {
517         NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSeq().SetDescr().Set()) {
518             if ((*it)->IsSource()) {
519                 if (NStr::IsBlank(common)) {
520                     (*it)->SetSource().SetOrg().ResetCommon();
521                 } else {
522                     (*it)->SetSource().SetOrg().SetCommon(common);
523                 }
524             }
525         }
526     } else if (entry->IsSet()) {
527         NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSet().SetDescr().Set()) {
528             if ((*it)->IsSource()) {
529                 if (NStr::IsBlank(common)) {
530                     (*it)->SetSource().SetOrg().ResetCommon();
531                 } else {
532                     (*it)->SetSource().SetOrg().SetCommon(common);
533                 }
534             }
535         }
536     }
537 }
538 
539 
SetLineage(CRef<objects::CSeq_entry> entry,string lineage)540 void SetLineage (CRef<objects::CSeq_entry> entry, string lineage)
541 {
542     if (!entry) {
543         return;
544     }
545     if (entry->IsSeq()) {
546         NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSeq().SetDescr().Set()) {
547             if ((*it)->IsSource()) {
548                 if (NStr::IsBlank(lineage)) {
549                     (*it)->SetSource().SetOrg().SetOrgname().ResetLineage();
550                 } else {
551                     (*it)->SetSource().SetOrg().SetOrgname().SetLineage(lineage);
552                 }
553             }
554         }
555     } else if (entry->IsSet()) {
556         NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSet().SetDescr().Set()) {
557             if ((*it)->IsSource()) {
558                 if (NStr::IsBlank(lineage)) {
559                     (*it)->SetSource().SetOrg().SetOrgname().ResetLineage();
560                 } else {
561                     (*it)->SetSource().SetOrg().SetOrgname().SetLineage(lineage);
562                 }
563             }
564         }
565     }
566 }
567 
568 
SetDiv(CRef<objects::CSeq_entry> entry,string div)569 void SetDiv (CRef<objects::CSeq_entry> entry, string div)
570 {
571     if (!entry) {
572         return;
573     }
574     if (entry->IsSeq()) {
575         NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSeq().SetDescr().Set()) {
576             if ((*it)->IsSource()) {
577                 if (NStr::IsBlank(div)) {
578                     (*it)->SetSource().SetOrg().SetOrgname().ResetDiv();
579                 } else {
580                     (*it)->SetSource().SetOrg().SetOrgname().SetDiv(div);
581                 }
582             }
583         }
584     } else if (entry->IsSet()) {
585         NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSet().SetDescr().Set()) {
586             if ((*it)->IsSource()) {
587                 if (NStr::IsBlank(div)) {
588                     (*it)->SetSource().SetOrg().SetOrgname().ResetDiv();
589                 } else {
590                     (*it)->SetSource().SetOrg().SetOrgname().SetDiv(div);
591                 }
592             }
593         }
594     }
595 }
596 
597 
SetOrigin(CRef<objects::CSeq_entry> entry,objects::CBioSource::TOrigin origin)598 void SetOrigin (CRef<objects::CSeq_entry> entry, objects::CBioSource::TOrigin origin)
599 {
600     if (!entry) {
601         return;
602     }
603     if (entry->IsSeq()) {
604         NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSeq().SetDescr().Set()) {
605             if ((*it)->IsSource()) {
606                 (*it)->SetSource().SetOrigin(origin);
607             }
608         }
609     } else if (entry->IsSet()) {
610         NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSet().SetDescr().Set()) {
611             if ((*it)->IsSource()) {
612                 (*it)->SetSource().SetOrigin(origin);
613             }
614         }
615     }
616 }
617 
618 
SetGcode(CRef<objects::CSeq_entry> entry,objects::COrgName::TGcode gcode)619 void SetGcode (CRef<objects::CSeq_entry> entry, objects::COrgName::TGcode gcode)
620 {
621     if (!entry) {
622         return;
623     }
624     if (entry->IsSeq()) {
625         NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSeq().SetDescr().Set()) {
626             if ((*it)->IsSource()) {
627                 (*it)->SetSource().SetOrg().SetOrgname().SetGcode(gcode);
628             }
629         }
630     } else if (entry->IsSet()) {
631         NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSet().SetDescr().Set()) {
632             if ((*it)->IsSource()) {
633                 (*it)->SetSource().SetOrg().SetOrgname().SetGcode(gcode);
634             }
635         }
636     }
637 }
638 
639 
SetMGcode(CRef<objects::CSeq_entry> entry,objects::COrgName::TGcode mgcode)640 void SetMGcode (CRef<objects::CSeq_entry> entry, objects::COrgName::TGcode mgcode)
641 {
642     if (!entry) {
643         return;
644     }
645     if (entry->IsSeq()) {
646         NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSeq().SetDescr().Set()) {
647             if ((*it)->IsSource()) {
648                 (*it)->SetSource().SetOrg().SetOrgname().SetMgcode(mgcode);
649             }
650         }
651     } else if (entry->IsSet()) {
652         NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSet().SetDescr().Set()) {
653             if ((*it)->IsSource()) {
654                 (*it)->SetSource().SetOrg().SetOrgname().SetMgcode(mgcode);
655             }
656         }
657     }
658 }
659 
660 
SetPGcode(CRef<objects::CSeq_entry> entry,objects::COrgName::TGcode pgcode)661 void SetPGcode (CRef<objects::CSeq_entry> entry, objects::COrgName::TGcode pgcode)
662 {
663     if (!entry) {
664         return;
665     }
666     if (entry->IsSeq()) {
667         NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSeq().SetDescr().Set()) {
668             if ((*it)->IsSource()) {
669                 (*it)->SetSource().SetOrg().SetOrgname().SetPgcode(pgcode);
670             }
671         }
672     } else if (entry->IsSet()) {
673         NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSet().SetDescr().Set()) {
674             if ((*it)->IsSource()) {
675                 (*it)->SetSource().SetOrg().SetOrgname().SetPgcode(pgcode);
676             }
677         }
678     }
679 }
680 
681 
ResetOrgname(CRef<objects::CSeq_entry> entry)682 void ResetOrgname (CRef<objects::CSeq_entry> entry)
683 {
684     if (!entry) {
685         return;
686     }
687     if (entry->IsSeq()) {
688         NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSeq().SetDescr().Set()) {
689             if ((*it)->IsSource()) {
690                 (*it)->SetSource().SetOrg().ResetOrgname();
691             }
692         }
693     } else if (entry->IsSet()) {
694         NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSet().SetDescr().Set()) {
695             if ((*it)->IsSource()) {
696                 (*it)->SetSource().SetOrg().ResetOrgname();
697             }
698         }
699     }
700 }
701 
702 
SetFocus(CRef<objects::CSeq_entry> entry)703 void SetFocus (CRef<objects::CSeq_entry> entry)
704 {
705     if (!entry) {
706         return;
707     }
708     if (entry->IsSeq()) {
709         NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSeq().SetDescr().Set()) {
710             if ((*it)->IsSource()) {
711                 (*it)->SetSource().SetIs_focus();
712             }
713         }
714     } else if (entry->IsSet()) {
715         NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSet().SetDescr().Set()) {
716             if ((*it)->IsSource()) {
717                 (*it)->SetSource().SetIs_focus();
718             }
719         }
720     }
721 }
722 
723 
ClearFocus(CRef<objects::CSeq_entry> entry)724 void ClearFocus (CRef<objects::CSeq_entry> entry)
725 {
726     if (!entry) {
727         return;
728     }
729     if (entry->IsSeq()) {
730         NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSeq().SetDescr().Set()) {
731             if ((*it)->IsSource()) {
732                 (*it)->SetSource().ResetIs_focus();
733             }
734         }
735     } else if (entry->IsSet()) {
736         NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSet().SetDescr().Set()) {
737             if ((*it)->IsSource()) {
738                 (*it)->SetSource().ResetIs_focus();
739             }
740         }
741     }
742 }
743 
744 
SetGenome(CRef<objects::CSeq_entry> entry,objects::CBioSource::TGenome genome)745 void SetGenome (CRef<objects::CSeq_entry> entry, objects::CBioSource::TGenome genome)
746 {
747     if (!entry) {
748         return;
749     }
750     if (entry->IsSeq()) {
751         NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSeq().SetDescr().Set()) {
752             if ((*it)->IsSource()) {
753                 (*it)->SetSource().SetGenome(genome);
754             }
755         }
756     } else if (entry->IsSet()) {
757         NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSet().SetDescr().Set()) {
758             if ((*it)->IsSource()) {
759                 (*it)->SetSource().SetGenome(genome);
760             }
761         }
762     }
763 }
764 
765 
SetSubSource(objects::CBioSource & src,objects::CSubSource::TSubtype subtype,string val)766 void SetSubSource (objects::CBioSource& src, objects::CSubSource::TSubtype subtype, string val)
767 {
768     if (NStr::IsBlank(val)) {
769         if (src.IsSetSubtype()) {
770             objects::CBioSource::TSubtype::iterator it = src.SetSubtype().begin();
771             while (it != src.SetSubtype().end()) {
772                 if ((*it)->IsSetSubtype() && (*it)->GetSubtype() == subtype) {
773                     it = src.SetSubtype().erase(it);
774                 } else {
775                     ++it;
776                 }
777             }
778         }
779     } else {
780         CRef<objects::CSubSource> sub(new objects::CSubSource(subtype, val));
781         if (NStr::EqualNocase(val, "true")) {
782             sub->SetName("");
783         }
784         src.SetSubtype().push_back(sub);
785     }
786 }
787 
788 
SetSubSource(CRef<objects::CSeq_entry> entry,objects::CSubSource::TSubtype subtype,string val)789 void SetSubSource (CRef<objects::CSeq_entry> entry, objects::CSubSource::TSubtype subtype, string val)
790 {
791     if (!entry) {
792         return;
793     }
794     if (entry->IsSeq()) {
795         NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSeq().SetDescr().Set()) {
796             if ((*it)->IsSource()) {
797                 SetSubSource((*it)->SetSource(), subtype, val);
798             }
799         }
800     } else if (entry->IsSet()) {
801         NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSet().SetDescr().Set()) {
802             if ((*it)->IsSource()) {
803                 SetSubSource((*it)->SetSource(), subtype, val);
804             }
805         }
806     }
807 }
808 
809 
SetChromosome(objects::CBioSource & src,string chromosome)810 void SetChromosome (objects::CBioSource& src, string chromosome)
811 {
812     if (NStr::IsBlank(chromosome)) {
813         if (src.IsSetSubtype()) {
814             objects::CBioSource::TSubtype::iterator it = src.SetSubtype().begin();
815             while (it != src.SetSubtype().end()) {
816                 if ((*it)->IsSetSubtype() && (*it)->GetSubtype() == objects::CSubSource::eSubtype_chromosome) {
817                     it = src.SetSubtype().erase(it);
818                 } else {
819                     ++it;
820                 }
821             }
822         }
823     } else {
824         CRef<objects::CSubSource> sub(new objects::CSubSource(objects::CSubSource::eSubtype_chromosome, chromosome));
825         src.SetSubtype().push_back(sub);
826     }
827 }
828 
829 
SetChromosome(CRef<objects::CSeq_entry> entry,string chromosome)830 void SetChromosome (CRef<objects::CSeq_entry> entry, string chromosome)
831 {
832     if (!entry) {
833         return;
834     }
835     if (entry->IsSeq()) {
836         NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSeq().SetDescr().Set()) {
837             if ((*it)->IsSource()) {
838                 SetChromosome((*it)->SetSource(), chromosome);
839             }
840         }
841     } else if (entry->IsSet()) {
842         NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSet().SetDescr().Set()) {
843             if ((*it)->IsSource()) {
844                 SetChromosome((*it)->SetSource(), chromosome);
845             }
846         }
847     }
848 }
849 
850 
SetTransgenic(objects::CBioSource & src,bool do_set)851 void SetTransgenic (objects::CBioSource& src, bool do_set)
852 {
853     if (do_set) {
854         CRef<objects::CSubSource> sub(new objects::CSubSource(objects::CSubSource::eSubtype_transgenic, ""));
855         src.SetSubtype().push_back(sub);
856     } else if (src.IsSetSubtype()) {
857         objects::CBioSource::TSubtype::iterator it = src.SetSubtype().begin();
858         while (it != src.SetSubtype().end()) {
859             if ((*it)->IsSetSubtype() && (*it)->GetSubtype() == objects::CSubSource::eSubtype_transgenic) {
860                 it = src.SetSubtype().erase(it);
861             } else {
862                 ++it;
863             }
864         }
865     }
866 }
867 
868 
SetTransgenic(CRef<objects::CSeq_entry> entry,bool do_set)869 void SetTransgenic (CRef<objects::CSeq_entry> entry, bool do_set)
870 {
871     if (!entry) {
872         return;
873     }
874     if (entry->IsSeq()) {
875         NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSeq().SetDescr().Set()) {
876             if ((*it)->IsSource()) {
877                 SetTransgenic((*it)->SetSource(), do_set);
878             }
879         }
880     } else if (entry->IsSet()) {
881         NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSet().SetDescr().Set()) {
882             if ((*it)->IsSource()) {
883                 SetTransgenic((*it)->SetSource(), do_set);
884             }
885         }
886     }
887 }
888 
889 
SetOrgMod(objects::CBioSource & src,objects::COrgMod::TSubtype subtype,string val)890 void SetOrgMod (objects::CBioSource& src, objects::COrgMod::TSubtype subtype, string val)
891 {
892     if (NStr::IsBlank(val)) {
893         if (src.IsSetOrg() && src.GetOrg().IsSetOrgname() && src.GetOrg().GetOrgname().IsSetMod()) {
894             objects::COrgName::TMod::iterator it = src.SetOrg().SetOrgname().SetMod().begin();
895             while (it != src.SetOrg().SetOrgname().SetMod().end()) {
896                 if ((*it)->IsSetSubtype() && (*it)->GetSubtype() == subtype) {
897                     it = src.SetOrg().SetOrgname().SetMod().erase(it);
898                 } else {
899                     ++it;
900                 }
901             }
902         }
903     } else {
904         CRef<objects::COrgMod> sub(new objects::COrgMod(subtype, val));
905         src.SetOrg().SetOrgname().SetMod().push_back(sub);
906     }
907 }
908 
909 
SetOrgMod(CRef<objects::CSeq_entry> entry,objects::COrgMod::TSubtype subtype,string val)910 void SetOrgMod (CRef<objects::CSeq_entry> entry, objects::COrgMod::TSubtype subtype, string val)
911 {
912     if (!entry) {
913         return;
914     }
915     if (entry->IsSeq()) {
916         NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSeq().SetDescr().Set()) {
917             if ((*it)->IsSource()) {
918                 SetOrgMod((*it)->SetSource(), subtype, val);
919             }
920         }
921     } else if (entry->IsSet()) {
922         NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSet().SetDescr().Set()) {
923             if ((*it)->IsSource()) {
924                 SetOrgMod((*it)->SetSource(), subtype, val);
925             }
926         }
927     }
928 }
929 
930 
BuildGoodAuthor()931 CRef<objects::CAuthor> BuildGoodAuthor()
932 {
933     CRef<objects::CAuthor> author(new objects::CAuthor());
934     author->SetName().SetName().SetLast("Last");
935     author->SetName().SetName().SetFirst("First");
936     author->SetName().SetName().SetMiddle("M");
937     return author;
938 }
939 
940 
BuildGoodArticlePub()941 CRef<objects::CPub> BuildGoodArticlePub()
942 {
943     CRef<objects::CPub> pub(new objects::CPub());
944 
945     CRef<objects::CCit_art::TTitle::C_E> art_title(new objects::CCit_art::TTitle::C_E());
946     art_title->SetName("article title");
947     pub->SetArticle().SetTitle().Set().push_back(art_title);
948     CRef<objects::CCit_jour::TTitle::C_E> journal_title(new objects::CCit_jour::TTitle::C_E());
949     journal_title->SetName("journal_title");
950     pub->SetArticle().SetFrom().SetJournal().SetTitle().Set().push_back(journal_title);
951     CRef<objects::CCit_jour::TTitle::C_E> iso_jta(new objects::CCit_jour::TTitle::C_E());
952     iso_jta->SetIso_jta("abbr");
953     pub->SetArticle().SetFrom().SetJournal().SetTitle().Set().push_back(iso_jta);
954     pub->SetArticle().SetAuthors().SetNames().SetStd().push_back(BuildGoodAuthor());
955     pub->SetArticle().SetFrom().SetJournal().SetImp().SetVolume("vol 1");
956     pub->SetArticle().SetFrom().SetJournal().SetImp().SetPages("14-32");
957     pub->SetArticle().SetFrom().SetJournal().SetImp().SetDate().SetStd().SetYear(2009);
958     return pub;
959 }
960 
961 
BuildGoodCitGenPub(CRef<objects::CAuthor> author,int serial_number)962 CRef<objects::CPub> BuildGoodCitGenPub(CRef<objects::CAuthor> author, int serial_number)
963 {
964     CRef<objects::CPub> pub(new objects::CPub());
965     if (!author) {
966         author = BuildGoodAuthor();
967     }
968     pub->SetGen().SetAuthors().SetNames().SetStd().push_back(author);
969     pub->SetGen().SetTitle("gen title");
970     pub->SetGen().SetDate().SetStd().SetYear(2009);
971     if (serial_number > -1) {
972         pub->SetGen().SetSerial_number(serial_number);
973     }
974     return pub;
975 }
976 
977 
BuildGoodCitSubPub()978 CRef<objects::CPub> BuildGoodCitSubPub()
979 {
980     CRef<objects::CPub> pub(new objects::CPub());
981     CRef<objects::CAuthor> author = BuildGoodAuthor();
982     pub->SetSub().SetAuthors().SetNames().SetStd().push_back(author);
983     pub->SetSub().SetAuthors().SetAffil().SetStd().SetAffil("A Major University");
984     pub->SetSub().SetAuthors().SetAffil().SetStd().SetSub("Maryland");
985     pub->SetSub().SetAuthors().SetAffil().SetStd().SetCountry("USA");
986     pub->SetSub().SetDate().SetStd().SetYear(2009);
987     return pub;
988 }
989 
990 
MakeSeqLong(objects::CBioseq & seq)991 void MakeSeqLong(objects::CBioseq& seq)
992 {
993     if (seq.SetInst().IsSetSeq_data()) {
994         if (seq.GetInst().GetSeq_data().IsIupacna()) {
995             seq.SetInst().SetSeq_data().SetIupacna().Set().clear();
996             for (int i = 0; i < 100; i++) {
997                 seq.SetInst().SetSeq_data().SetIupacna().Set().append(
998                     "AAAAATTTTTGGGGGCCCCCTTTTTAAAAATTTTTGGGGGCCCCCTTTTTAAAAATTTTTGGGGGCCCCCTTTTTAAAAATTTTTGGGGGCCCCCTTTTT");
999             }
1000             seq.SetInst().SetLength(10000);
1001         } else if (seq.GetInst().GetSeq_data().IsIupacaa()) {
1002             seq.SetInst().SetSeq_data().SetIupacaa().Set().clear();
1003             for (int i = 0; i < 100; i++) {
1004                 seq.SetInst().SetSeq_data().SetIupacaa().Set().append(
1005                     "MPRKTEINSLMPRKTEINSLMPRKTEINSLMPRKTEINSLMPRKTEINSLMPRKTEINSLMPRKTEINSLMPRKTEINSLMPRKTEINSLMPRKTEINSL");
1006             }
1007             seq.SetInst().SetLength(10000);
1008         }
1009     }
1010 }
1011 
1012 
SetBiomol(CRef<objects::CSeq_entry> entry,objects::CMolInfo::TBiomol biomol)1013 void SetBiomol (CRef<objects::CSeq_entry> entry, objects::CMolInfo::TBiomol biomol)
1014 {
1015     bool found = false;
1016 
1017     NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSeq().SetDescr().Set()) {
1018         if ((*it)->IsMolinfo()) {
1019             (*it)->SetMolinfo().SetBiomol(biomol);
1020             found = true;
1021         }
1022     }
1023     if (!found) {
1024         CRef<objects::CSeqdesc> mdesc(new objects::CSeqdesc());
1025         mdesc->SetMolinfo().SetBiomol(biomol);
1026         entry->SetSeq().SetDescr().Set().push_back(mdesc);
1027     }
1028 }
1029 
1030 
SetTech(CRef<objects::CSeq_entry> entry,objects::CMolInfo::TTech tech)1031 void SetTech (CRef<objects::CSeq_entry> entry, objects::CMolInfo::TTech tech)
1032 {
1033     bool found = false;
1034 
1035     NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSeq().SetDescr().Set()) {
1036         if ((*it)->IsMolinfo()) {
1037             (*it)->SetMolinfo().SetTech(tech);
1038             found = true;
1039         }
1040     }
1041     if (!found) {
1042         CRef<objects::CSeqdesc> mdesc(new objects::CSeqdesc());
1043         mdesc->SetMolinfo().SetTech(tech);
1044         entry->SetSeq().SetDescr().Set().push_back(mdesc);
1045     }
1046 }
1047 
1048 
SetCompleteness(CRef<objects::CSeq_entry> entry,objects::CMolInfo::TCompleteness completeness)1049 void SetCompleteness(CRef<objects::CSeq_entry> entry, objects::CMolInfo::TCompleteness completeness)
1050 {
1051     if (entry->IsSeq()) {
1052         bool found = false;
1053         NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSeq().SetDescr().Set()) {
1054             if ((*it)->IsMolinfo()) {
1055                 (*it)->SetMolinfo().SetCompleteness (completeness);
1056                 found = true;
1057             }
1058         }
1059         if (!found) {
1060             CRef<objects::CSeqdesc> mdesc(new objects::CSeqdesc());
1061             if (entry->GetSeq().IsAa()) {
1062                 mdesc->SetMolinfo().SetBiomol(objects::CMolInfo::eBiomol_peptide);
1063             } else {
1064                 mdesc->SetMolinfo().SetBiomol(objects::CMolInfo::eBiomol_genomic);
1065             }
1066             mdesc->SetMolinfo().SetCompleteness (completeness);
1067             entry->SetSeq().SetDescr().Set().push_back(mdesc);
1068         }
1069     }
1070 }
1071 
1072 
BuildGoodProtSeq(void)1073 CRef<objects::CSeq_entry> BuildGoodProtSeq(void)
1074 {
1075     CRef<objects::CSeq_entry> entry = BuildGoodSeq();
1076 
1077     entry->SetSeq().SetInst().SetMol(objects::CSeq_inst::eMol_aa);
1078     entry->SetSeq().SetInst().SetSeq_data().SetIupacaa().Set("PRKTEIN");
1079     entry->SetSeq().SetInst().SetLength(7);
1080     NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSeq().SetDescr().Set()) {
1081         if ((*it)->IsMolinfo()) {
1082             (*it)->SetMolinfo().SetBiomol(objects::CMolInfo::eBiomol_peptide);
1083         }
1084     }
1085 
1086     AddProtFeat (entry);
1087 
1088     return entry;
1089 }
1090 
1091 
MakeProteinForGoodNucProtSet(string id)1092 CRef<objects::CSeq_entry> MakeProteinForGoodNucProtSet (string id)
1093 {
1094     // make protein
1095     CRef<objects::CBioseq> pseq(new objects::CBioseq());
1096     pseq->SetInst().SetMol(objects::CSeq_inst::eMol_aa);
1097     pseq->SetInst().SetRepr(objects::CSeq_inst::eRepr_raw);
1098     pseq->SetInst().SetSeq_data().SetIupacaa().Set("MPRKTEIN");
1099     pseq->SetInst().SetLength(8);
1100 
1101     CRef<objects::CSeq_id> pid(new objects::CSeq_id());
1102     pid->SetLocal().SetStr (id);
1103     pseq->SetId().push_back(pid);
1104 
1105     CRef<objects::CSeqdesc> mpdesc(new objects::CSeqdesc());
1106     mpdesc->SetMolinfo().SetBiomol(objects::CMolInfo::eBiomol_peptide);
1107     mpdesc->SetMolinfo().SetCompleteness(objects::CMolInfo::eCompleteness_complete);
1108     pseq->SetDescr().Set().push_back(mpdesc);
1109 
1110     CRef<objects::CSeq_entry> pentry(new objects::CSeq_entry());
1111     pentry->SetSeq(*pseq);
1112 
1113     CRef<objects::CSeq_feat> feat (new objects::CSeq_feat());
1114     feat->SetData().SetProt().SetName().push_back("fake protein name");
1115     feat->SetLocation().SetInt().SetId().SetLocal().SetStr(id);
1116     feat->SetLocation().SetInt().SetFrom(0);
1117     feat->SetLocation().SetInt().SetTo(7);
1118     AddFeat (feat, pentry);
1119 
1120     return pentry;
1121 }
1122 
1123 
MakeCDSForGoodNucProtSet(const string & nuc_id,const string & prot_id)1124 CRef<objects::CSeq_feat> MakeCDSForGoodNucProtSet (const string& nuc_id, const string& prot_id)
1125 {
1126     CRef<objects::CSeq_feat> cds (new objects::CSeq_feat());
1127     cds->SetData().SetCdregion();
1128     cds->SetProduct().SetWhole().SetLocal().SetStr(prot_id);
1129     cds->SetLocation().SetInt().SetId().SetLocal().SetStr(nuc_id);
1130     cds->SetLocation().SetInt().SetFrom(0);
1131     cds->SetLocation().SetInt().SetTo(26);
1132     return cds;
1133 }
1134 
1135 
BuildGoodNucProtSet(void)1136 CRef<objects::CSeq_entry> BuildGoodNucProtSet(void)
1137 {
1138     CRef<objects::CBioseq_set> set(new objects::CBioseq_set());
1139     set->SetClass(objects::CBioseq_set::eClass_nuc_prot);
1140 
1141     // make nucleotide
1142     CRef<objects::CBioseq> nseq(new objects::CBioseq());
1143     nseq->SetInst().SetMol(objects::CSeq_inst::eMol_dna);
1144     nseq->SetInst().SetRepr(objects::CSeq_inst::eRepr_raw);
1145     nseq->SetInst().SetSeq_data().SetIupacna().Set("ATGCCCAGAAAAACAGAGATAAACTAAGGGATGCCCAGAAAAACAGAGATAAACTAAGGG");
1146     nseq->SetInst().SetLength(60);
1147 
1148     CRef<objects::CSeq_id> id(new objects::CSeq_id());
1149     id->SetLocal().SetStr ("nuc");
1150     nseq->SetId().push_back(id);
1151 
1152     CRef<objects::CSeqdesc> mdesc(new objects::CSeqdesc());
1153     mdesc->SetMolinfo().SetBiomol(objects::CMolInfo::eBiomol_genomic);
1154     nseq->SetDescr().Set().push_back(mdesc);
1155 
1156     CRef<objects::CSeq_entry> nentry(new objects::CSeq_entry());
1157     nentry->SetSeq(*nseq);
1158 
1159     set->SetSeq_set().push_back(nentry);
1160 
1161     // make protein
1162     CRef<objects::CSeq_entry> pentry = MakeProteinForGoodNucProtSet("prot");
1163 
1164     set->SetSeq_set().push_back(pentry);
1165 
1166     CRef<objects::CSeq_entry> set_entry(new objects::CSeq_entry());
1167     set_entry->SetSet(*set);
1168 
1169     CRef<objects::CSeq_feat> cds = MakeCDSForGoodNucProtSet("nuc", "prot");
1170     AddFeat (cds, set_entry);
1171 
1172     AddGoodSource (set_entry);
1173     AddGoodPub(set_entry);
1174     return set_entry;
1175 }
1176 
1177 
AdjustProtFeatForNucProtSet(CRef<objects::CSeq_entry> entry)1178 void AdjustProtFeatForNucProtSet(CRef<objects::CSeq_entry> entry)
1179 {
1180     CRef<objects::CSeq_feat> prot;
1181     CRef<objects::CSeq_entry> prot_seq;
1182 
1183     if (!entry) {
1184         return;
1185     }
1186     if (entry->IsSeq()) {
1187         prot_seq = entry;
1188         prot = entry->SetSeq().SetAnnot().front()->SetData().SetFtable().front();
1189     } else if (entry->IsSet()) {
1190         prot_seq = entry->SetSet().SetSeq_set().back();
1191         prot = prot_seq->SetSeq().SetAnnot().front()->SetData().SetFtable().front();
1192     }
1193     if (prot && prot_seq) {
1194         prot->SetLocation().SetInt().SetTo(prot_seq->SetSeq().SetInst().SetLength() - 1);
1195     }
1196 }
1197 
1198 
SetNucProtSetProductName(CRef<objects::CSeq_entry> entry,string new_name)1199 void SetNucProtSetProductName (CRef<objects::CSeq_entry> entry, string new_name)
1200 {
1201     CRef<objects::CSeq_feat> prot;
1202     CRef<objects::CSeq_entry> prot_seq;
1203 
1204     if (!entry) {
1205         return;
1206     }
1207     if (entry->IsSeq()) {
1208         prot_seq = entry;
1209         prot = entry->SetSeq().SetAnnot().front()->SetData().SetFtable().front();
1210     } else if (entry->IsSet()) {
1211         prot_seq = entry->SetSet().SetSeq_set().back();
1212         prot = prot_seq->SetSeq().SetAnnot().front()->SetData().SetFtable().front();
1213     }
1214     if (prot) {
1215         if (prot->SetData().SetProt().SetName().size() > 0) {
1216             prot->SetData().SetProt().SetName().pop_front();
1217         }
1218         prot->SetData().SetProt().SetName().push_front(new_name);
1219     }
1220 }
1221 
1222 
GetCDSFromGoodNucProtSet(CRef<objects::CSeq_entry> entry)1223 CRef<objects::CSeq_feat> GetCDSFromGoodNucProtSet (CRef<objects::CSeq_entry> entry)
1224 {
1225     return entry->SetSet().SetAnnot().front()->SetData().SetFtable().front();
1226 }
1227 
1228 
GetNucleotideSequenceFromGoodNucProtSet(CRef<objects::CSeq_entry> entry)1229 CRef<objects::CSeq_entry> GetNucleotideSequenceFromGoodNucProtSet (CRef<objects::CSeq_entry> entry)
1230 {
1231     return entry->SetSet().SetSeq_set().front();
1232 }
1233 
1234 
GetProteinSequenceFromGoodNucProtSet(CRef<objects::CSeq_entry> entry)1235 CRef<objects::CSeq_entry> GetProteinSequenceFromGoodNucProtSet (CRef<objects::CSeq_entry> entry)
1236 {
1237     return entry->SetSet().SetSeq_set().back();
1238 }
1239 
1240 
GetProtFeatFromGoodNucProtSet(CRef<objects::CSeq_entry> entry)1241 CRef<objects::CSeq_feat> GetProtFeatFromGoodNucProtSet (CRef<objects::CSeq_entry> entry)
1242 {
1243     CRef<objects::CSeq_entry> pentry = GetProteinSequenceFromGoodNucProtSet(entry);
1244     return pentry->SetSeq().SetAnnot().front()->SetData().SetFtable().front();
1245 }
1246 
1247 
RetranslateCdsForNucProtSet(CRef<objects::CSeq_entry> entry,objects::CScope & scope)1248 void RetranslateCdsForNucProtSet (CRef<objects::CSeq_entry> entry, objects::CScope &scope)
1249 {
1250     CRef<objects::CSeq_feat> cds = GetCDSFromGoodNucProtSet(entry);
1251     CRef<objects::CBioseq> bioseq = CSeqTranslator::TranslateToProtein(*cds, scope);
1252     CRef<objects::CSeq_entry> pentry = GetProteinSequenceFromGoodNucProtSet(entry);
1253     pentry->SetSeq().SetInst().Assign(bioseq->GetInst());
1254     AdjustProtFeatForNucProtSet (entry);
1255 }
1256 
1257 
SetProteinPartial(CRef<CSeq_entry> pentry,bool partial5,bool partial3)1258 void SetProteinPartial(CRef<CSeq_entry> pentry, bool partial5, bool partial3)
1259 {
1260     CRef<CSeq_feat> prot = pentry->SetAnnot().front()->SetData().SetFtable().front();
1261     prot->SetPartial(partial5 || partial3);
1262     prot->SetLocation().SetPartialStart(partial5, objects::eExtreme_Biological);
1263     prot->SetLocation().SetPartialStop(partial3, objects::eExtreme_Biological);
1264 
1265     // molinfo completeness
1266     if (partial5 && partial3) {
1267         SetCompleteness (pentry, objects::CMolInfo::eCompleteness_no_ends);
1268     } else if (partial5) {
1269         SetCompleteness (pentry, objects::CMolInfo::eCompleteness_no_left);
1270     } else if (partial3) {
1271         SetCompleteness (pentry, objects::CMolInfo::eCompleteness_no_right);
1272     } else {
1273         SetCompleteness (pentry, objects::CMolInfo::eCompleteness_complete);
1274     }
1275 }
1276 
1277 
SetNucProtSetPartials(CRef<objects::CSeq_entry> entry,bool partial5,bool partial3)1278 void SetNucProtSetPartials (CRef<objects::CSeq_entry> entry, bool partial5, bool partial3)
1279 {
1280     // partials for CDS
1281     CRef<objects::CSeq_feat> cds = GetCDSFromGoodNucProtSet(entry);
1282     cds->SetPartial(partial5 || partial3);
1283     cds->SetLocation().SetPartialStart(partial5, objects::eExtreme_Biological);
1284     cds->SetLocation().SetPartialStop(partial3, objects::eExtreme_Biological);
1285 
1286     CRef<objects::CSeq_entry> pentry = GetProteinSequenceFromGoodNucProtSet(entry);
1287     SetProteinPartial(pentry, partial5, partial3);
1288 }
1289 
1290 
ChangeNucProtSetProteinId(CRef<objects::CSeq_entry> entry,CRef<objects::CSeq_id> id)1291 void ChangeNucProtSetProteinId (CRef<objects::CSeq_entry> entry, CRef<objects::CSeq_id> id)
1292 {
1293     CRef<objects::CSeq_entry> pseq = GetProteinSequenceFromGoodNucProtSet(entry);
1294     pseq->SetSeq().SetId().front()->Assign(*id);
1295 
1296     CRef<objects::CSeq_feat> pfeat = GetProtFeatFromGoodNucProtSet(entry);
1297     pfeat->SetLocation().SetInt().SetId().Assign(*id);
1298 
1299     CRef<objects::CSeq_feat> cds = GetCDSFromGoodNucProtSet(entry);
1300     cds->SetProduct().SetWhole().Assign(*id);
1301 }
1302 
1303 
ChangeNucProtSetNucId(CRef<objects::CSeq_entry> entry,CRef<objects::CSeq_id> id)1304 void ChangeNucProtSetNucId (CRef<objects::CSeq_entry> entry, CRef<objects::CSeq_id> id)
1305 {
1306     CRef<objects::CSeq_entry> nseq = GetNucleotideSequenceFromGoodNucProtSet(entry);
1307     nseq->SetSeq().SetId().front()->Assign(*id);
1308 
1309     CRef<objects::CSeq_feat> cds = GetCDSFromGoodNucProtSet(entry);
1310     if(cds->GetLocation().IsInt()) {
1311         cds->SetLocation().SetInt().SetId().Assign(*id);
1312     } else if (cds->GetLocation().IsMix()) {
1313         cds->SetLocation().SetMix().Set().front()->SetInt().SetId().Assign(*id);
1314         cds->SetLocation().SetMix().Set().back()->SetInt().SetId().Assign(*id);
1315     }
1316 }
1317 
1318 
MakeNucProtSet3Partial(CRef<objects::CSeq_entry> entry)1319 void MakeNucProtSet3Partial (CRef<objects::CSeq_entry> entry)
1320 {
1321     CRef<objects::CSeq_feat> cds = GetCDSFromGoodNucProtSet(entry);
1322     cds->SetLocation().SetInt().SetTo(59);
1323     cds->SetLocation().SetPartialStop(true, objects::eExtreme_Biological);
1324     cds->SetPartial(true);
1325     CRef<objects::CSeq_entry> nuc_seq = entry->SetSet().SetSeq_set().front();
1326     nuc_seq->SetSeq().SetInst().SetSeq_data().SetIupacna().Set("ATGCCCAGAAAAACAGAGATAAACAAAGGGATGCCCAGAAAAACAGAGATAAACAAAGGG");
1327     CRef<objects::CSeq_entry> prot_seq = entry->SetSet().SetSeq_set().back();
1328     prot_seq->SetSeq().SetInst().SetSeq_data().SetIupacaa().Set("MPRKTEINKGMPRKTEINKG");
1329     prot_seq->SetSeq().SetInst().SetLength(20);
1330     SetCompleteness (prot_seq, objects::CMolInfo::eCompleteness_no_right);
1331     CRef<objects::CSeq_feat> prot = prot_seq->SetSeq().SetAnnot().front()->SetData().SetFtable().front();
1332     prot->SetLocation().SetInt().SetTo(19);
1333     prot->SetLocation().SetPartialStop(true, objects::eExtreme_Biological);
1334     prot->SetPartial(true);
1335 
1336 }
1337 
1338 
ChangeId(CRef<objects::CSeq_annot> annot,CRef<objects::CSeq_id> id)1339 void ChangeId(CRef<objects::CSeq_annot> annot, CRef<objects::CSeq_id> id)
1340 {
1341     if (annot && annot->IsFtable()) {
1342         objects::CSeq_annot::C_Data::TFtable::iterator it = annot->SetData().SetFtable().begin();
1343         while (it != annot->SetData().SetFtable().end()) {
1344             (*it)->SetLocation().SetInt().SetId().Assign(*id);
1345             ++it;
1346         }
1347     }
1348 }
1349 
1350 
ChangeProductId(CRef<objects::CSeq_annot> annot,CRef<objects::CSeq_id> id)1351 void ChangeProductId(CRef<objects::CSeq_annot> annot, CRef<objects::CSeq_id> id)
1352 {
1353     if (annot && annot->IsFtable()) {
1354         objects::CSeq_annot::C_Data::TFtable::iterator it = annot->SetData().SetFtable().begin();
1355         while (it != annot->SetData().SetFtable().end()) {
1356             if ((*it)->IsSetProduct()) {
1357                 (*it)->SetProduct().SetWhole().Assign(*id);
1358             }
1359             ++it;
1360         }
1361     }
1362 }
1363 
1364 
ChangeNucId(CRef<objects::CSeq_entry> np_set,CRef<objects::CSeq_id> id)1365 void ChangeNucId(CRef<objects::CSeq_entry> np_set, CRef<objects::CSeq_id> id)
1366 {
1367     if (!np_set || !np_set->IsSet()) {
1368         return;
1369     }
1370 
1371     CRef<objects::CSeq_entry> nuc_entry = np_set->SetSet().SetSeq_set().front();
1372 
1373     nuc_entry->SetSeq().SetId().front()->Assign(*id);
1374     if (nuc_entry->SetSeq().IsSetAnnot()) {
1375         NON_CONST_ITERATE(objects::CSeq_entry::TAnnot, annot_it, nuc_entry->SetSeq().SetAnnot()) {
1376             ChangeId (*annot_it, id);
1377         }
1378     }
1379     if (np_set->SetSet().IsSetAnnot()) {
1380         NON_CONST_ITERATE(objects::CSeq_entry::TAnnot, annot_it, np_set->SetSet().SetAnnot()) {
1381             ChangeId (*annot_it, id);
1382         }
1383     }
1384 }
1385 
1386 
ChangeProtId(CRef<objects::CSeq_entry> np_set,CRef<objects::CSeq_id> id)1387 void ChangeProtId(CRef<objects::CSeq_entry> np_set, CRef<objects::CSeq_id> id)
1388 {
1389     if (!np_set || !np_set->IsSet()) {
1390         return;
1391     }
1392 
1393     CRef<objects::CSeq_entry> prot_entry = np_set->SetSet().SetSeq_set().back();
1394     CRef<objects::CSeq_feat> cds = GetCDSFromGoodNucProtSet(np_set);
1395 
1396     prot_entry->SetSeq().SetId().front()->Assign(*id);
1397     EDIT_EACH_SEQANNOT_ON_BIOSEQ (annot_it, prot_entry->SetSeq()) {
1398         ChangeId (*annot_it, id);
1399     }
1400 
1401     EDIT_EACH_SEQANNOT_ON_SEQSET (annot_it, np_set->SetSet()) {
1402         ChangeProductId (*annot_it, id);
1403     }
1404 }
1405 
1406 
BuildRefSeqId(void)1407 CRef<objects::CSeq_id> BuildRefSeqId(void)
1408 {
1409     CRef<objects::CSeq_id> id(new objects::CSeq_id());
1410     id->SetOther().SetAccession("NC_123456");
1411     return id;
1412 }
1413 
1414 
ChangeId(CRef<objects::CSeq_entry> entry,CRef<objects::CSeq_id> id)1415 void ChangeId(CRef<objects::CSeq_entry> entry, CRef<objects::CSeq_id> id)
1416 {
1417     if (entry->IsSeq()) {
1418         entry->SetSeq().SetId().front()->Assign(*id);
1419         if (entry->SetSeq().IsSetAnnot()) {
1420             objects::CBioseq::TAnnot::iterator annot_it = entry->SetSeq().SetAnnot().begin();
1421             while (annot_it != entry->SetSeq().SetAnnot().end()) {
1422                 if ((*annot_it)->IsFtable()) {
1423                     objects::CSeq_annot::C_Data::TFtable::iterator it = (*annot_it)->SetData().SetFtable().begin();
1424                     while (it != (*annot_it)->SetData().SetFtable().end()) {
1425                         (*it)->SetLocation().SetId(*id);
1426                         ++it;
1427                     }
1428                 }
1429                 ++annot_it;
1430             }
1431         }
1432     }
1433 }
1434 
1435 
ChangeId(CRef<objects::CSeq_annot> annot,string suffix)1436 void ChangeId(CRef<objects::CSeq_annot> annot, string suffix)
1437 {
1438     if (annot && annot->IsFtable()) {
1439         objects::CSeq_annot::C_Data::TFtable::iterator it = annot->SetData().SetFtable().begin();
1440         while (it != annot->SetData().SetFtable().end()) {
1441             (*it)->SetLocation().SetInt().SetId().SetLocal().SetStr().append(suffix);
1442             if ((*it)->IsSetProduct()) {
1443                 (*it)->SetProduct().SetWhole().SetLocal().SetStr().append(suffix);
1444             }
1445             ++it;
1446         }
1447     }
1448 }
1449 
1450 
ChangeId(CRef<objects::CSeq_entry> entry,string suffix)1451 void ChangeId(CRef<objects::CSeq_entry> entry, string suffix)
1452 {
1453     if (entry->IsSeq()) {
1454         entry->SetSeq().SetId().front()->SetLocal().SetStr().append(suffix);
1455         if (entry->SetSeq().IsSetAnnot()) {
1456             objects::CBioseq::TAnnot::iterator annot_it = entry->SetSeq().SetAnnot().begin();
1457             while (annot_it != entry->SetSeq().SetAnnot().end()) {
1458                 ChangeId(*annot_it, suffix);
1459                 ++annot_it;
1460             }
1461         }
1462     } else if (entry->IsSet()) {
1463         objects::CBioseq_set::TSeq_set::iterator it = entry->SetSet().SetSeq_set().begin();
1464         while (it != entry->SetSet().SetSeq_set().end()) {
1465             ChangeId(*it, suffix);
1466             ++it;
1467         }
1468         if (entry->SetSet().IsSetAnnot()) {
1469             objects::CBioseq_set::TAnnot::iterator annot_it = entry->SetSet().SetAnnot().begin();
1470             while (annot_it != entry->SetSet().SetAnnot().end()) {
1471                 ChangeId(*annot_it, suffix);
1472                 ++annot_it;
1473             }
1474         }
1475     }
1476 }
1477 
1478 
BuildGenProdSetNucProtSet(CRef<objects::CSeq_id> nuc_id,CRef<objects::CSeq_id> prot_id)1479 CRef<objects::CSeq_entry> BuildGenProdSetNucProtSet (CRef<objects::CSeq_id> nuc_id, CRef<objects::CSeq_id> prot_id)
1480 {
1481     CRef<objects::CSeq_entry> np = BuildGoodNucProtSet();
1482     CRef<objects::CSeq_entry> nuc = GetNucleotideSequenceFromGoodNucProtSet(np);
1483     nuc->SetSeq().SetInst().SetSeq_data().SetIupacna().Set("ATGCCCAGAAAAACAGAGATAAACTAA");
1484     nuc->SetSeq().SetInst().SetLength(27);
1485     nuc->SetSeq().SetInst().SetMol(objects::CSeq_inst::eMol_rna);
1486     SetBiomol(nuc, objects::CMolInfo::eBiomol_mRNA);
1487     if (nuc_id) {
1488         ChangeNucProtSetNucId(np, nuc_id);
1489     }
1490     if (prot_id) {
1491         ChangeNucProtSetProteinId(np, prot_id);
1492     }
1493     return np;
1494 }
1495 
1496 
MakemRNAForCDS(CRef<objects::CSeq_feat> feat)1497 CRef<objects::CSeq_feat> MakemRNAForCDS (CRef<objects::CSeq_feat> feat)
1498 {
1499     CRef<objects::CSeq_feat> mrna(new objects::CSeq_feat);
1500     mrna->SetData().SetRna().SetType(objects::CRNA_ref::eType_mRNA);
1501     mrna->SetLocation().Assign(feat->GetLocation());
1502     return mrna;
1503 }
1504 
1505 
BuildGoodGenProdSet()1506 CRef<objects::CSeq_entry> BuildGoodGenProdSet()
1507 {
1508     CRef<objects::CSeq_entry> entry(new objects::CSeq_entry());
1509     entry->SetSet().SetClass(objects::CBioseq_set::eClass_gen_prod_set);
1510     CRef<objects::CSeq_entry> contig = BuildGoodSeq();
1511     contig->SetSeq().SetInst().SetSeq_data().SetIupacna().Set("ATGCCCAGAAAAACAGAGATAAACTAAGGGATGCCCAGAAAAACAGAGATAAACTAAGGG");
1512     contig->SetSeq().SetInst().SetLength(60);
1513     entry->SetSet().SetSeq_set().push_back (contig);
1514     CRef<objects::CSeq_id> nuc_id(new objects::CSeq_id());
1515     nuc_id->SetLocal().SetStr("nuc");
1516     CRef<objects::CSeq_id> prot_id(new objects::CSeq_id());
1517     prot_id->SetLocal().SetStr("prot");
1518     CRef<objects::CSeq_entry> np = BuildGenProdSetNucProtSet(nuc_id, prot_id);
1519     entry->SetSet().SetSeq_set().push_back (np);
1520 
1521     CRef<objects::CSeq_feat> cds(new objects::CSeq_feat());
1522     cds->Assign (*(GetCDSFromGoodNucProtSet(np)));
1523     cds->SetLocation().SetInt().SetId().SetLocal().SetStr("good");
1524     AddFeat (cds, contig);
1525     CRef<objects::CSeq_feat> mrna = MakemRNAForCDS(cds);
1526     mrna->SetProduct().SetWhole().Assign(*nuc_id);
1527     AddFeat (mrna, contig);
1528 
1529     return entry;
1530 }
1531 
1532 
GetGenomicFromGenProdSet(CRef<objects::CSeq_entry> entry)1533 CRef<objects::CSeq_entry> GetGenomicFromGenProdSet (CRef<objects::CSeq_entry> entry)
1534 {
1535     return entry->SetSet().SetSeq_set().front();
1536 }
1537 
1538 
GetmRNAFromGenProdSet(CRef<objects::CSeq_entry> entry)1539 CRef<objects::CSeq_feat> GetmRNAFromGenProdSet(CRef<objects::CSeq_entry> entry)
1540 {
1541     CRef<objects::CSeq_entry> genomic = GetGenomicFromGenProdSet(entry);
1542     CRef<objects::CSeq_feat> mrna = genomic->SetSeq().SetAnnot().front()->SetData().SetFtable().back();
1543     return mrna;
1544 }
1545 
1546 
GetNucProtSetFromGenProdSet(CRef<objects::CSeq_entry> entry)1547 CRef<objects::CSeq_entry> GetNucProtSetFromGenProdSet(CRef<objects::CSeq_entry> entry)
1548 {
1549     return entry->SetSet().SetSeq_set().back();
1550 }
1551 
1552 
GetCDSFromGenProdSet(CRef<objects::CSeq_entry> entry)1553 CRef<objects::CSeq_feat> GetCDSFromGenProdSet (CRef<objects::CSeq_entry> entry)
1554 {
1555     CRef<objects::CSeq_entry> genomic = GetGenomicFromGenProdSet(entry);
1556     CRef<objects::CSeq_feat> cds = genomic->SetSeq().SetAnnot().front()->SetData().SetFtable().front();
1557     return cds;
1558 }
1559 
1560 
RevComp(objects::CBioseq & bioseq)1561 void RevComp (objects::CBioseq& bioseq)
1562 {
1563     if (!bioseq.IsNa() || !bioseq.IsSetInst()
1564         || !bioseq.GetInst().IsSetSeq_data()
1565         || !bioseq.GetInst().GetSeq_data().IsIupacna()) {
1566         return;
1567     }
1568     string seq = bioseq.GetInst().GetSeq_data().GetIupacna().Get();
1569     string new_seq = "";
1570     string::iterator sit = seq.end();
1571     while (sit != seq.begin()) {
1572         --sit;
1573         string new_ch = "";
1574         new_ch += *sit;
1575         if (NStr::Equal(new_ch, "A")) {
1576             new_ch = "T";
1577         } else if (NStr::Equal(new_ch, "T")) {
1578             new_ch = "A";
1579         } else if (NStr::Equal(new_ch, "G")) {
1580             new_ch = "C";
1581         } else if (NStr::Equal(new_ch, "C")) {
1582             new_ch = "G";
1583         }
1584         new_seq.append(new_ch);
1585     }
1586 
1587     bioseq.SetInst().SetSeq_data().SetIupacna().Set(new_seq);
1588     size_t len = bioseq.GetLength();
1589     if (bioseq.IsSetAnnot()) {
1590         EDIT_EACH_SEQFEAT_ON_SEQANNOT (feat_it, *(bioseq.SetAnnot().front())) {
1591             TSeqPos new_from = len - (*feat_it)->GetLocation().GetInt().GetTo() - 1;
1592             TSeqPos new_to = len - (*feat_it)->GetLocation().GetInt().GetFrom() - 1;
1593             (*feat_it)->SetLocation().SetInt().SetFrom(new_from);
1594             (*feat_it)->SetLocation().SetInt().SetTo(new_to);
1595             if ((*feat_it)->GetLocation().GetInt().IsSetStrand()
1596                 && (*feat_it)->GetLocation().GetInt().GetStrand() == objects::eNa_strand_minus) {
1597                 (*feat_it)->SetLocation().SetInt().SetStrand(objects::eNa_strand_plus);
1598             } else {
1599                 (*feat_it)->SetLocation().SetInt().SetStrand(objects::eNa_strand_minus);
1600             }
1601         }
1602     }
1603 }
1604 
1605 
RevComp(objects::CSeq_loc & loc,size_t len)1606 void RevComp (objects::CSeq_loc& loc, size_t len)
1607 {
1608     if (loc.IsInt()) {
1609         TSeqPos new_from = len - loc.GetInt().GetTo() - 1;
1610         TSeqPos new_to = len - loc.GetInt().GetFrom() - 1;
1611         loc.SetInt().SetFrom(new_from);
1612         loc.SetInt().SetTo(new_to);
1613         if (loc.GetInt().IsSetStrand()
1614             && loc.GetInt().GetStrand() == eNa_strand_minus) {
1615             loc.SetInt().SetStrand(eNa_strand_plus);
1616         } else {
1617             loc.SetInt().SetStrand(eNa_strand_minus);
1618         }
1619     } else if (loc.IsMix()) {
1620         NON_CONST_ITERATE (objects::CSeq_loc_mix::Tdata, it, loc.SetMix().Set()) {
1621             RevComp (**it, len);
1622         }
1623     }
1624 }
1625 
1626 
RevComp(CRef<objects::CSeq_entry> entry)1627 void RevComp (CRef<objects::CSeq_entry> entry)
1628 {
1629     if (entry->IsSeq()) {
1630         RevComp(entry->SetSeq());
1631     } else if (entry->IsSet()) {
1632         if (entry->GetSet().IsSetClass()
1633             && entry->GetSet().GetClass() == objects::CBioseq_set::eClass_nuc_prot) {
1634             RevComp(entry->SetSet().SetSeq_set().front());
1635             size_t len = entry->GetSet().GetSeq_set().front()->GetSeq().GetLength();
1636             EDIT_EACH_SEQFEAT_ON_SEQANNOT (feat_it, *(entry->SetSet().SetAnnot().front())) {
1637                 RevComp ((*feat_it)->SetLocation(), len);
1638             }
1639         }
1640     }
1641 }
1642 
1643 
BuildGoodDeltaSeq(void)1644 CRef<objects::CSeq_entry> BuildGoodDeltaSeq(void)
1645 {
1646     CRef<objects::CSeq_entry> entry = BuildGoodSeq();
1647 
1648     entry->SetSeq().SetInst().ResetSeq_data();
1649     entry->SetSeq().SetInst().SetRepr(objects::CSeq_inst::eRepr_delta);
1650     entry->SetSeq().SetInst().SetExt().SetDelta().AddLiteral("ATGATGATGCCC", objects::CSeq_inst::eMol_dna);
1651     CRef<objects::CDelta_seq> gap_seg(new objects::CDelta_seq());
1652     gap_seg->SetLiteral().SetSeq_data().SetGap();
1653     gap_seg->SetLiteral().SetLength(10);
1654     entry->SetSeq().SetInst().SetExt().SetDelta().Set().push_back(gap_seg);
1655     entry->SetSeq().SetInst().SetExt().SetDelta().AddLiteral("CCCATGATGATG", objects::CSeq_inst::eMol_dna);
1656     entry->SetSeq().SetInst().SetLength(34);
1657 
1658     return entry;
1659 }
1660 
1661 
RemoveDeltaSeqGaps(CRef<objects::CSeq_entry> entry)1662 void RemoveDeltaSeqGaps(CRef<objects::CSeq_entry> entry)
1663 {
1664     objects::CDelta_ext::Tdata::iterator seg_it = entry->SetSeq().SetInst().SetExt().SetDelta().Set().begin();
1665     while (seg_it != entry->SetSeq().SetInst().SetExt().SetDelta().Set().end()) {
1666         if ((*seg_it)->IsLiteral()
1667             && (!(*seg_it)->GetLiteral().IsSetSeq_data()
1668                 || (*seg_it)->GetLiteral().GetSeq_data().IsGap())) {
1669             TSeqPos len = entry->SetSeq().SetInst().GetLength();
1670             len -= (*seg_it)->GetLiteral().GetLength();
1671             seg_it = entry->SetSeq().SetInst().SetExt().SetDelta().Set().erase(seg_it);
1672             entry->SetSeq().SetInst().SetLength(len);
1673         } else {
1674             ++seg_it;
1675         }
1676     }
1677 }
1678 
1679 
AddToDeltaSeq(CRef<objects::CSeq_entry> entry,string seq)1680 void AddToDeltaSeq(CRef<objects::CSeq_entry> entry, string seq)
1681 {
1682     size_t orig_len = entry->GetSeq().GetLength();
1683     size_t add_len = seq.length();
1684 
1685     CRef<objects::CDelta_seq> gap_seg(new objects::CDelta_seq());
1686     gap_seg->SetLiteral().SetSeq_data().SetGap();
1687     gap_seg->SetLiteral().SetLength(10);
1688     entry->SetSeq().SetInst().SetExt().SetDelta().Set().push_back(gap_seg);
1689     entry->SetSeq().SetInst().SetExt().SetDelta().AddLiteral(seq, objects::CSeq_inst::eMol_dna);
1690     entry->SetSeq().SetInst().SetLength(orig_len + 10 + add_len);
1691 }
1692 
1693 
BuildSegSetPart(string id_str)1694 CRef<objects::CSeq_entry> BuildSegSetPart(string id_str)
1695 {
1696     CRef<objects::CSeq_entry> part(new objects::CSeq_entry());
1697     part->SetSeq().SetInst().SetMol(objects::CSeq_inst::eMol_dna);
1698     part->SetSeq().SetInst().SetRepr(objects::CSeq_inst::eRepr_raw);
1699     part->SetSeq().SetInst().SetSeq_data().SetIupacna().Set("AATTGGCCAAAATTGGCCAAAATTGGCCAAAATTGGCCAAAATTGGCCAAAATTGGCCAA");
1700     part->SetSeq().SetInst().SetLength(60);
1701     CRef<objects::CSeq_id> id(new objects::CSeq_id(id_str));
1702     part->SetSeq().SetId().push_back(id);
1703     SetBiomol(part, objects::CMolInfo::eBiomol_genomic);
1704     return part;
1705 }
1706 
1707 
BuildGoodSegSet(void)1708 CRef<objects::CSeq_entry> BuildGoodSegSet(void)
1709 {
1710     CRef<objects::CSeq_entry> segset(new objects::CSeq_entry());
1711     segset->SetSet().SetClass(objects::CBioseq_set::eClass_segset);
1712 
1713     CRef<objects::CSeq_entry> seg_seq(new objects::CSeq_entry());
1714     seg_seq->SetSeq().SetInst().SetMol(objects::CSeq_inst::eMol_dna);
1715     seg_seq->SetSeq().SetInst().SetRepr(objects::CSeq_inst::eRepr_seg);
1716 
1717     CRef<objects::CSeq_loc> loc1(new objects::CSeq_loc());
1718     loc1->SetWhole().SetLocal().SetStr("part1");
1719     CRef<objects::CSeq_loc> loc2(new objects::CSeq_loc());
1720     loc2->SetWhole().SetLocal().SetStr("part2");
1721     CRef<objects::CSeq_loc> loc3(new objects::CSeq_loc());
1722     loc3->SetWhole().SetLocal().SetStr("part3");
1723 
1724     seg_seq->SetSeq().SetInst().SetExt().SetSeg().Set().push_back(loc1);
1725     seg_seq->SetSeq().SetInst().SetExt().SetSeg().Set().push_back(loc2);
1726     seg_seq->SetSeq().SetInst().SetExt().SetSeg().Set().push_back(loc3);
1727     seg_seq->SetSeq().SetInst().SetLength(180);
1728 
1729     CRef<objects::CSeq_id> id(new objects::CSeq_id());
1730     id->SetLocal().SetStr ("master");
1731     seg_seq->SetSeq().SetId().push_back(id);
1732     seg_seq->SetSeq().SetInst().SetLength(180);
1733     SetBiomol(seg_seq, objects::CMolInfo::eBiomol_genomic);
1734 
1735     segset->SetSet().SetSeq_set().push_back(seg_seq);
1736 
1737     // create parts set
1738     CRef<objects::CSeq_entry> parts_set(new objects::CSeq_entry());
1739     parts_set->SetSet().SetClass(objects::CBioseq_set::eClass_parts);
1740     parts_set->SetSet().SetSeq_set().push_back(BuildSegSetPart("lcl|part1"));
1741     parts_set->SetSet().SetSeq_set().push_back(BuildSegSetPart("lcl|part2"));
1742     parts_set->SetSet().SetSeq_set().push_back(BuildSegSetPart("lcl|part3"));
1743 
1744     segset->SetSet().SetSeq_set().push_back(parts_set);
1745 
1746 //    CRef<objects::CSeqdesc> pdesc(new objects::CSeqdesc());
1747 //    CRef<objects::CPub> pub(new objects::CPub());
1748 //    pub->SetPmid((objects::CPub::TPmid)1);
1749 //    pdesc->SetPub().SetPub().Set().push_back(pub);
1750 //    segset->SetDescr().Set().push_back(pdesc);
1751     AddGoodPub(segset);
1752     CRef<objects::CSeqdesc> odesc(new objects::CSeqdesc());
1753     odesc->SetSource().SetOrg().SetTaxname("Sebaea microphylla");
1754     odesc->SetSource().SetOrg().SetOrgname().SetLineage("some lineage");
1755     CRef<objects::CDbtag> taxon_id(new objects::CDbtag());
1756     taxon_id->SetDb("taxon");
1757     taxon_id->SetTag().SetId(592768);
1758     odesc->SetSource().SetOrg().SetDb().push_back(taxon_id);
1759     CRef<objects::CSubSource> subsrc(new objects::CSubSource());
1760     subsrc->SetSubtype(objects::CSubSource::eSubtype_chromosome);
1761     subsrc->SetName("1");
1762     odesc->SetSource().SetSubtype().push_back(subsrc);
1763     segset->SetDescr().Set().push_back(odesc);
1764 
1765     return segset;
1766 }
1767 
1768 
BuildGoodEcoSet()1769 CRef<objects::CSeq_entry> BuildGoodEcoSet()
1770 {
1771     CRef<objects::CSeq_entry> entry(new objects::CSeq_entry());
1772     entry->SetSet().SetClass(objects::CBioseq_set::eClass_eco_set);
1773     CRef<objects::CSeq_entry> seq1 = BuildGoodSeq();
1774     ChangeId(seq1, "1");
1775     CRef<objects::CSeq_entry> seq2 = BuildGoodSeq();
1776     ChangeId(seq2, "2");
1777     CRef<objects::CSeq_entry> seq3 = BuildGoodSeq();
1778     ChangeId(seq3, "3");
1779     entry->SetSet().SetSeq_set().push_back(seq1);
1780     entry->SetSet().SetSeq_set().push_back(seq2);
1781     entry->SetSet().SetSeq_set().push_back(seq3);
1782 
1783     CRef<objects::CSeqdesc> desc(new objects::CSeqdesc());
1784     desc->SetTitle("popset title");
1785     entry->SetSet().SetDescr().Set().push_back(desc);
1786 
1787     return entry;
1788 }
1789 
1790 
BuildGoodEcoSetWithAlign(size_t front_insert)1791 CRef<objects::CSeq_entry> BuildGoodEcoSetWithAlign(size_t front_insert)
1792 {
1793     CRef<CSeq_entry> entry = BuildGoodEcoSet();
1794 
1795     CRef<objects::CSeq_align> align(new CSeq_align());
1796     align->SetType(objects::CSeq_align::eType_global);
1797     align->SetDim(entry->GetSet().GetSeq_set().size());
1798     size_t offset = 0;
1799     for (auto& s : entry->SetSet().SetSeq_set()) {
1800         CRef<CSeq_id> id(new CSeq_id());
1801         id->Assign(*(s->GetSeq().GetId().front()));
1802         align->SetSegs().SetDenseg().SetIds().push_back(id);
1803         if (offset > 0) {
1804             const string& orig = s->SetSeq().SetInst().SetSeq_data().SetIupacna().Set();
1805             size_t orig_len = s->GetSeq().GetInst().GetLength();
1806             string add = "";
1807             for (auto i = (size_t)0; i < offset; i++) {
1808                 add += "A";
1809             }
1810             s->SetSeq().SetInst().SetSeq_data().SetIupacna().Set(add + orig);
1811             s->SetSeq().SetInst().SetLength(orig_len + offset);
1812         }
1813         align->SetSegs().SetDenseg().SetStarts().push_back(offset);
1814         offset += front_insert;
1815     }
1816     align->SetSegs().SetDenseg().SetNumseg(1);
1817     align->SetSegs().SetDenseg().SetLens().push_back(entry->GetSet().GetSeq_set().front()->GetSeq().GetInst().GetLength());
1818     align->SetSegs().SetDenseg().SetDim(3);
1819 
1820     CRef<CSeq_annot> annot(new CSeq_annot());
1821     annot->SetData().SetAlign().push_back(align);
1822     entry->SetSet().SetAnnot().push_back(annot);
1823     return entry;
1824 }
1825 
1826 
1827 // assumes that sequence has been reverse-complemented
ReverseAlignmentStrand(CDense_seg & denseg,size_t pos,size_t seq_len)1828 void ReverseAlignmentStrand(CDense_seg& denseg, size_t pos, size_t seq_len)
1829 {
1830     // prepopulate the strand array if not already present
1831     auto num_pieces = denseg.GetDim() * denseg.GetNumseg();
1832     if (!denseg.IsSetStrands()) {
1833         for (auto i = 0; i < num_pieces; i++) {
1834             denseg.SetStrands().push_back(eNa_strand_plus);
1835         }
1836     } else if (denseg.GetStrands().size() < num_pieces) {
1837         for (auto i = denseg.GetStrands().size(); i < num_pieces; i++) {
1838             denseg.SetStrands().push_back(eNa_strand_plus);
1839         }
1840     }
1841     for (auto i = 0; i < denseg.GetNumseg(); i++) {
1842         auto offset = i * denseg.GetDim() + pos;
1843         auto orig = denseg.GetStarts()[offset];
1844         if (orig > -1) {
1845             denseg.SetStarts()[offset] = seq_len - orig - denseg.GetLens()[i];
1846         }
1847         if (denseg.GetStrands()[offset] == eNa_strand_minus) {
1848             denseg.SetStrands()[offset] = eNa_strand_plus;
1849         } else {
1850             denseg.SetStrands()[offset] = eNa_strand_minus;
1851         }
1852     }
1853 }
1854 
1855 
BuildGoodAlign()1856 CRef<objects::CSeq_align> BuildGoodAlign()
1857 {
1858     CRef<objects::CSeq_align> align(new objects::CSeq_align());
1859     CRef<objects::CSeq_id> id1(new objects::CSeq_id());
1860     id1->SetGenbank().SetAccession("FJ375734.2");
1861     id1->SetGenbank().SetVersion(2);
1862     CRef<objects::CSeq_id> id2(new objects::CSeq_id());
1863     id2->SetGenbank().SetAccession("FJ375735.2");
1864     id2->SetGenbank().SetVersion(2);
1865     align->SetDim(2);
1866     align->SetType(objects::CSeq_align::eType_global);
1867     align->SetSegs().SetDenseg().SetIds().push_back(id1);
1868     align->SetSegs().SetDenseg().SetIds().push_back(id2);
1869     align->SetSegs().SetDenseg().SetDim(2);
1870     align->SetSegs().SetDenseg().SetStarts().push_back(0);
1871     align->SetSegs().SetDenseg().SetStarts().push_back(0);
1872     align->SetSegs().SetDenseg().SetNumseg(1);
1873     align->SetSegs().SetDenseg().SetLens().push_back(812);
1874 
1875     return align;
1876 }
1877 
1878 
BuildGoodGraphAnnot(string id)1879 CRef<objects::CSeq_annot> BuildGoodGraphAnnot(string id)
1880 {
1881     CRef<objects::CSeq_graph> graph(new objects::CSeq_graph());
1882     graph->SetLoc().SetInt().SetFrom(0);
1883     graph->SetLoc().SetInt().SetTo(10);
1884     graph->SetLoc().SetInt().SetId().SetLocal().SetStr(id);
1885 
1886     CRef<objects::CSeq_annot> annot(new objects::CSeq_annot());
1887     annot->SetData().SetGraph().push_back(graph);
1888 
1889     return annot;
1890 }
1891 
1892 
RemoveDescriptorType(CRef<objects::CSeq_entry> entry,objects::CSeqdesc::E_Choice desc_choice)1893 void RemoveDescriptorType (CRef<objects::CSeq_entry> entry, objects::CSeqdesc::E_Choice desc_choice)
1894 {
1895     EDIT_EACH_DESCRIPTOR_ON_SEQENTRY (dit, *entry) {
1896         if ((*dit)->Which() == desc_choice) {
1897             ERASE_DESCRIPTOR_ON_SEQENTRY (dit, *entry);
1898         }
1899     }
1900 }
1901 
1902 
BuildtRNA(CRef<objects::CSeq_id> id)1903 CRef<objects::CSeq_feat> BuildtRNA(CRef<objects::CSeq_id> id)
1904 {
1905     CRef<objects::CSeq_feat> feat(new objects::CSeq_feat());
1906     feat->SetLocation().SetInt().SetId().Assign(*id);
1907     feat->SetLocation().SetInt().SetFrom(0);
1908     feat->SetLocation().SetInt().SetTo(10);
1909 
1910     feat->SetData().SetRna().SetType(CRNA_ref::eType_tRNA);
1911     feat->SetData().SetRna().SetExt().SetTRNA().SetAa().SetIupacaa('N');
1912     feat->SetData().SetRna().SetExt().SetTRNA().SetAnticodon().SetInt().SetId().Assign(*id);
1913     feat->SetData().SetRna().SetExt().SetTRNA().SetAnticodon().SetInt().SetFrom(11);
1914     feat->SetData().SetRna().SetExt().SetTRNA().SetAnticodon().SetInt().SetTo(13);
1915 
1916     return feat;
1917 }
1918 
1919 
BuildGoodtRNA(CRef<objects::CSeq_id> id)1920 CRef<objects::CSeq_feat> BuildGoodtRNA(CRef<objects::CSeq_id> id)
1921 {
1922     CRef<objects::CSeq_feat> trna = BuildtRNA(id);
1923     trna->SetData().SetRna().SetExt().SetTRNA().SetAnticodon().SetInt().SetFrom(8);
1924     trna->SetData().SetRna().SetExt().SetTRNA().SetAnticodon().SetInt().SetTo(10);
1925     trna->SetData().SetRna().SetExt().SetTRNA().SetAa().SetIupacaa('F');
1926     return trna;
1927 }
1928 
1929 
MakeMixLoc(CRef<objects::CSeq_id> id)1930 CRef<objects::CSeq_loc> MakeMixLoc (CRef<objects::CSeq_id> id)
1931 {
1932     CRef<objects::CSeq_loc> loc1(new objects::CSeq_loc());
1933     loc1->SetInt().SetFrom(0);
1934     loc1->SetInt().SetTo(15);
1935     loc1->SetInt().SetId().Assign(*id);
1936     CRef<objects::CSeq_loc> loc2(new objects::CSeq_loc());
1937     loc2->SetInt().SetFrom(46);
1938     loc2->SetInt().SetTo(56);
1939     loc2->SetInt().SetId().Assign(*id);
1940     CRef<objects::CSeq_loc> mixloc(new objects::CSeq_loc());
1941     mixloc->SetMix().Set().push_back(loc1);
1942     mixloc->SetMix().Set().push_back(loc2);
1943     return mixloc;
1944 }
1945 
1946 
MakeIntronForMixLoc(CRef<objects::CSeq_id> id)1947 CRef<objects::CSeq_feat> MakeIntronForMixLoc (CRef<objects::CSeq_id> id)
1948 {
1949     CRef<objects::CSeq_feat> intron (new objects::CSeq_feat());
1950     intron->SetData().SetImp().SetKey("intron");
1951     intron->SetLocation().SetInt().SetFrom(16);
1952     intron->SetLocation().SetInt().SetTo(45);
1953     intron->SetLocation().SetInt().SetId().Assign(*id);
1954     return intron;
1955 }
1956 
1957 
SetSpliceForMixLoc(objects::CBioseq & seq)1958 void SetSpliceForMixLoc (objects::CBioseq& seq)
1959 {
1960     seq.SetInst().SetSeq_data().SetIupacna().Set()[16] = 'G';
1961     seq.SetInst().SetSeq_data().SetIupacna().Set()[17] = 'T';
1962     seq.SetInst().SetSeq_data().SetIupacna().Set()[44] = 'A';
1963     seq.SetInst().SetSeq_data().SetIupacna().Set()[45] = 'G';
1964 }
1965 
1966 
MakeGeneForFeature(CRef<objects::CSeq_feat> feat)1967 CRef<objects::CSeq_feat> MakeGeneForFeature (CRef<objects::CSeq_feat> feat)
1968 {
1969     CRef<objects::CSeq_feat> gene(new objects::CSeq_feat());
1970     gene->SetData().SetGene().SetLocus("gene locus");
1971     gene->SetLocation().SetInt().SetId().Assign(*(feat->GetLocation().GetId()));
1972     gene->SetLocation().SetInt().SetStrand(feat->GetLocation().GetStrand());
1973     gene->SetLocation().SetInt().SetFrom(feat->GetLocation().GetStart(objects::eExtreme_Positional));
1974     gene->SetLocation().SetInt().SetTo(feat->GetLocation().GetStop(objects::eExtreme_Positional));
1975     gene->SetLocation().SetPartialStart(feat->GetLocation().IsPartialStart(objects::eExtreme_Positional), objects::eExtreme_Positional);
1976     gene->SetLocation().SetPartialStop(feat->GetLocation().IsPartialStop(objects::eExtreme_Positional), objects::eExtreme_Positional);
1977     if (feat->IsSetPartial() && feat->GetPartial()) {
1978         gene->SetPartial(true);
1979     }
1980     return gene;
1981 }
1982 
1983 
AddGoodImpFeat(CRef<objects::CSeq_entry> entry,string key)1984 CRef<objects::CSeq_feat> AddGoodImpFeat (CRef<objects::CSeq_entry> entry, string key)
1985 {
1986     CRef<objects::CSeq_feat> imp_feat = AddMiscFeature (entry, 10);
1987     imp_feat->SetData().SetImp().SetKey(key);
1988     if (NStr::Equal(key, "conflict")) {
1989         imp_feat->AddQualifier("citation", "1");
1990     } else if (NStr::Equal(key, "intron")) {
1991         entry->SetSeq().SetInst().SetSeq_data().SetIupacna().Set()[0] = 'G';
1992         entry->SetSeq().SetInst().SetSeq_data().SetIupacna().Set()[1] = 'T';
1993         entry->SetSeq().SetInst().SetSeq_data().SetIupacna().Set()[9] = 'A';
1994         entry->SetSeq().SetInst().SetSeq_data().SetIupacna().Set()[10] = 'G';
1995     } else if (NStr::Equal(key, "misc_binding") || NStr::Equal(key, "protein_bind")) {
1996         imp_feat->AddQualifier("bound_moiety", "foo");
1997     } else if (NStr::Equal(key, "modified_base")) {
1998         imp_feat->AddQualifier("mod_base", "foo");
1999     } else if (NStr::Equal(key, "old_sequence")) {
2000         imp_feat->AddQualifier("citation", "1");
2001     } else if (NStr::Equal(key, "operon")) {
2002         imp_feat->AddQualifier("operon", "foo");
2003     } else if (NStr::Equal(key, "polyA_site")) {
2004         imp_feat->SetLocation().SetPnt().SetId().SetLocal().SetStr("good");
2005         imp_feat->SetLocation().SetPnt().SetPoint(5);
2006     } else if (NStr::Equal(key, "source")) {
2007         imp_feat->AddQualifier("organism", "foo");
2008     }
2009     return imp_feat;
2010 }
2011 
2012 // helper classes for TraverseAndRunTestCases
2013 namespace {
2014 
2015     // This just accumulates all the files in the path
2016     struct SFileRememberer
2017     {
operator ()__anon69a043f40111::SFileRememberer2018         void operator()( const CDirEntry & dir_entry ) {
2019             m_filesFound.push_back(CFile(dir_entry));
2020         }
2021 
2022         vector<CFile> m_filesFound;
2023     };
2024 
2025     // a simple function object that extracts the
2026     // first of a pair. (Unfortunately, "select1st" is not part of
2027     // the STL standard so it can't be relied upon to exist)
2028     template<typename Pair>
2029     struct SFirstOfPair
2030     {
operator ()__anon69a043f40111::SFirstOfPair2031         typename Pair::first_type operator()( const Pair & a_pair ) const
2032         {
2033             return a_pair.first;
2034         }
2035     };
2036 }
2037 
TraverseAndRunTestCases(ITestRunner * pTestRunner,CDir dirWithTestCases,const set<string> & setOfRequiredSuffixes,const set<string> & setOfOptionalSuffixes,const set<string> & setOfIgnoredSuffixes,TTraverseAndRunTestCasesFlags fFlags)2038 void TraverseAndRunTestCases(
2039     ITestRunner *pTestRunner,
2040     CDir dirWithTestCases,
2041     const set<string> & setOfRequiredSuffixes,
2042     const set<string> & setOfOptionalSuffixes,
2043     const set<string> & setOfIgnoredSuffixes,
2044     TTraverseAndRunTestCasesFlags fFlags )
2045 {
2046     if( ! pTestRunner ) {
2047         NCBI_USER_THROW_FMT("NULL pTestRunner");
2048     }
2049     if( ! dirWithTestCases.Exists() ) {
2050         pTestRunner->OnError("Top-level test-cases dir not found: " + dirWithTestCases.GetPath() );
2051         return;
2052     }
2053     if( ! dirWithTestCases.IsDir() ) {
2054         pTestRunner->OnError("Top-level test-cases dir is actually not a dir: " + dirWithTestCases.GetPath() );
2055         return;
2056     }
2057 
2058     const vector<string> kEmptyVectorOfStrings;
2059 
2060     SFileRememberer fileRememberer;
2061     FindFilesInDir(
2062         dirWithTestCases,
2063         kEmptyVectorOfStrings,
2064         kEmptyVectorOfStrings,
2065         fileRememberer,
2066         fFF_File | fFF_Recursive );
2067 
2068     // this is what we search for to see if there is a hidden directory
2069     // or file anywhere along the path.
2070     const string kHiddenSubstring = CDirEntry::GetPathSeparator() + string(".svn") + CDirEntry::GetPathSeparator();
2071 
2072     typedef map<string, ITestRunner::TMapSuffixToFile> TMapTestNameToItsFiles;
2073     TMapTestNameToItsFiles mapTestNameToItsFiles;
2074     // this loop loads mapTestNameToItsFiles
2075     ITERATE( vector<CFile>, file_it, fileRememberer.m_filesFound ) {
2076         const string sFileName = file_it->GetName();
2077         const string sFileAbsPath = CDirEntry::CreateAbsolutePath(file_it->GetPath());
2078 
2079         // hidden folders or files of any kind are silently ignored
2080         if( NStr::Find(sFileAbsPath, kHiddenSubstring) != NPOS ) {
2081             continue;
2082         }
2083 
2084         if( ! (fFlags & fTraverseAndRunTestCasesFlags_DoNOTIgnoreREADMEFiles) &&
2085             NStr::StartsWith(sFileName, "README") )
2086         {
2087             // if requested, silently ignore files starting with README
2088             continue;
2089         }
2090 
2091         // extract out testname and suffix
2092         string sTestName;
2093         string sSuffix;
2094         NStr::SplitInTwo(sFileName, ".", sTestName, sSuffix);
2095         if( sTestName.empty() || sSuffix.empty() ) {
2096             pTestRunner->OnError("Bad file name: " + file_it->GetPath());
2097             continue;
2098         }
2099 
2100         if( setOfIgnoredSuffixes.find(sSuffix) != setOfIgnoredSuffixes.end() ) {
2101             // silently ignores suffixes requested to be ignored by the user
2102             continue;
2103         }
2104 
2105         // load this entry, with error if not inserted
2106         const bool bWasInserted =
2107             mapTestNameToItsFiles[sTestName].insert(make_pair(sSuffix, *file_it)).second;
2108         if( ! bWasInserted ) {
2109             pTestRunner->OnError(
2110                 "File with same name appears multiple times in different dirs: " +
2111                 file_it->GetPath() );
2112             continue;
2113         }
2114     }
2115 
2116     // sanity check all tests and remove the unusable ones
2117     ERASE_ITERATE(TMapTestNameToItsFiles, test_it, mapTestNameToItsFiles) {
2118         const string & sTestName = test_it->first;
2119         const ITestRunner::TMapSuffixToFile & mapSuffixToFile =
2120             test_it->second;
2121 
2122         // get the keys (that is, the suffixes) of the map
2123         set<string> setOfAllSuffixes;
2124         transform(mapSuffixToFile.begin(), mapSuffixToFile.end(),
2125             inserter(setOfAllSuffixes, setOfAllSuffixes.begin()),
2126             SFirstOfPair<ITestRunner::TMapSuffixToFile::value_type>() );
2127 
2128         // get the non-required suffixes that were used
2129         set<string> setOfNonRequiredSuffixes;
2130         set_difference( setOfAllSuffixes.begin(), setOfAllSuffixes.end(),
2131             setOfRequiredSuffixes.begin(), setOfRequiredSuffixes.end(),
2132             inserter(setOfNonRequiredSuffixes, setOfNonRequiredSuffixes.begin() ) );
2133 
2134         // make sure it has all required suffixes
2135         // (the set of suffixes should have shrunk by exactly the number of required
2136         // suffixes on the set_difference just above)
2137         const size_t szNumOfSuffixes = setOfAllSuffixes.size();
2138         const size_t szNumOfNonRequiredSuffixes = setOfNonRequiredSuffixes.size();
2139         if( (szNumOfSuffixes - szNumOfNonRequiredSuffixes) != setOfRequiredSuffixes.size() )
2140         {
2141             pTestRunner->OnError("Skipping this test because it's missing some files: " + sTestName);
2142             mapTestNameToItsFiles.erase(test_it);
2143             continue;
2144         }
2145 
2146         // all non-required suffixes should be in the optional set
2147         if( ! includes( setOfOptionalSuffixes.begin(), setOfOptionalSuffixes.end(),
2148             setOfNonRequiredSuffixes.begin(), setOfNonRequiredSuffixes.end() ) )
2149         {
2150             pTestRunner->OnError("Skipping this test because it has unexpected suffix(es): " + sTestName);
2151             mapTestNameToItsFiles.erase(test_it);
2152             continue;
2153         }
2154     }
2155 
2156     // there should be at least one test to run
2157     if( mapTestNameToItsFiles.empty() ) {
2158         pTestRunner->OnError("There are no tests to run");
2159         return;
2160     }
2161 
2162     // Now, actually run the tests
2163     ITERATE(TMapTestNameToItsFiles, test_it, mapTestNameToItsFiles) {
2164         const string & sTestName = test_it->first;
2165         const ITestRunner::TMapSuffixToFile & mapSuffixToFile =
2166             test_it->second;
2167 
2168         cerr << "Running test: " << sTestName << endl;
2169         pTestRunner->RunTest(sTestName, mapSuffixToFile);
2170     }
2171 }
2172 
2173 END_SCOPE(unit_test_util)
2174 END_SCOPE(objects)
2175 END_NCBI_SCOPE
2176