1 /* $Id: unit_test_util.cpp 607137 2020-04-30 12:57:57Z grichenk $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Jonathan Kans, Clifford Clausen, Aaron Ucko.......
27 *
28 * File Description:
29 * Validates objects::CSeq_entries and objects::CSeq_submits
30 *
31 */
32 #include <ncbi_pch.hpp>
33 #include <corelib/ncbistd.hpp>
34 #include <objects/general/Object_id.hpp>
35 #include <objects/general/Dbtag.hpp>
36 #include <objects/seqfeat/BioSource.hpp>
37 #include <objects/seqfeat/Org_ref.hpp>
38 #include <objects/seqfeat/Imp_feat.hpp>
39 #include <objects/seqfeat/Cdregion.hpp>
40 #include <objects/seq/Seq_descr.hpp>
41 #include <objects/biblio/Id_pat.hpp>
42 #include <objects/biblio/Title.hpp>
43 #include <objects/general/Dbtag.hpp>
44 #include <objects/general/User_object.hpp>
45 #include <objects/medline/Medline_entry.hpp>
46 #include <objects/misc/sequence_macros.hpp>
47 #include <objects/pub/Pub_equiv.hpp>
48 #include <objects/pub/Pub.hpp>
49 #include <objects/seq/GIBB_mol.hpp>
50 #include <objects/seq/Seq_ext.hpp>
51 #include <objects/seq/Delta_ext.hpp>
52 #include <objects/seq/Delta_seq.hpp>
53 #include <objects/seq/Seq_literal.hpp>
54 #include <objects/seq/Ref_ext.hpp>
55 #include <objects/seq/Map_ext.hpp>
56 #include <objects/seq/Seg_ext.hpp>
57 #include <objects/seq/Seq_gap.hpp>
58 #include <objects/seq/Seq_data.hpp>
59 #include <objects/seq/Seq_descr.hpp>
60 #include <objects/seq/Seqdesc.hpp>
61 #include <objects/seq/MolInfo.hpp>
62 #include <objects/seq/Pubdesc.hpp>
63 #include <objects/seq/Seq_hist.hpp>
64 #include <objects/seq/Seq_hist_rec.hpp>
65 #include <objects/seqalign/Dense_seg.hpp>
66 #include <objects/seqblock/GB_block.hpp>
67 #include <objects/seqblock/EMBL_block.hpp>
68 #include <objects/seqfeat/BioSource.hpp>
69 #include <objects/seqfeat/Org_ref.hpp>
70 #include <objects/seqfeat/OrgName.hpp>
71 #include <objects/seqfeat/SubSource.hpp>
72 #include <objects/seqfeat/Imp_feat.hpp>
73 #include <objects/seqfeat/Cdregion.hpp>
74 #include <objects/seqloc/Seq_id.hpp>
75 #include <objects/seqloc/PDB_seq_id.hpp>
76 #include <objects/seqloc/Giimport_id.hpp>
77 #include <objects/seqloc/Patent_seq_id.hpp>
78 #include <objects/seqloc/Seq_loc.hpp>
79 #include <objects/seqloc/Seq_interval.hpp>
80 #include <objmgr/util/sequence.hpp>
81 #include <objects/misc/sequence_macros.hpp>
82
83 #include <objtools/unit_test_util/unit_test_util.hpp>
84
85 #include <functional>
86
87 BEGIN_NCBI_SCOPE
BEGIN_SCOPE(objects)88 BEGIN_SCOPE(objects)
89 BEGIN_SCOPE(unit_test_util)
90
91
92 void SetDbxref (objects::CBioSource& src, string db, objects::CObject_id::TId id)
93 {
94 CRef<objects::CDbtag> dbtag(new objects::CDbtag());
95 dbtag->SetDb(db);
96 dbtag->SetTag().SetId(id);
97 src.SetOrg().SetDb().push_back(dbtag);
98 }
99
100
RemoveDbxref(objects::CBioSource & src,string db,objects::CObject_id::TId id)101 void RemoveDbxref (objects::CBioSource& src, string db, objects::CObject_id::TId id)
102 {
103 if (src.IsSetOrg() && src.GetOrg().IsSetDb()) {
104 objects::COrg_ref::TDb::iterator it = src.SetOrg().SetDb().begin();
105 while (it != src.SetOrg().SetDb().end()) {
106 if ((NStr::IsBlank(db) || ((*it)->IsSetDb() && NStr::Equal((*it)->GetDb(), db)))
107 && (id == 0 || ((*it)->IsSetTag() && (*it)->GetTag().IsId() && (*it)->GetTag().GetId() == id))) {
108 it = src.SetOrg().SetDb().erase(it);
109 } else {
110 ++it;
111 }
112 }
113 }
114 }
115
116
SetTaxon(objects::CBioSource & src,size_t taxon)117 void SetTaxon (objects::CBioSource& src, size_t taxon)
118 {
119 if (taxon == 0) {
120 RemoveDbxref (src, "taxon", 0);
121 } else {
122 SetDbxref(src, "taxon", taxon);
123 }
124 }
125
126
BuildGoodSeq(void)127 CRef<objects::CSeq_entry> BuildGoodSeq(void)
128 {
129 CRef<objects::CSeq_entry> entry(new objects::CSeq_entry());
130 entry->SetSeq().SetInst().SetMol(objects::CSeq_inst::eMol_dna);
131 entry->SetSeq().SetInst().SetRepr(objects::CSeq_inst::eRepr_raw);
132 entry->SetSeq().SetInst().SetSeq_data().SetIupacna().Set("AATTGGCCAAAATTGGCCAAAATTGGCCAAAATTGGCCAAAATTGGCCAAAATTGGCCAA");
133 entry->SetSeq().SetInst().SetLength(60);
134
135 CRef<objects::CSeq_id> id(new objects::CSeq_id());
136 id->SetLocal().SetStr ("good");
137 entry->SetSeq().SetId().push_back(id);
138
139 CRef<objects::CSeqdesc> mdesc(new objects::CSeqdesc());
140 mdesc->SetMolinfo().SetBiomol(objects::CMolInfo::eBiomol_genomic);
141 entry->SetSeq().SetDescr().Set().push_back(mdesc);
142
143 AddGoodSource (entry);
144 AddGoodPub(entry);
145
146 return entry;
147 }
148
149
BuildGoodPubSeqdesc()150 CRef<objects::CSeqdesc> BuildGoodPubSeqdesc()
151 {
152 CRef<objects::CSeqdesc> pdesc(new objects::CSeqdesc());
153 CRef<objects::CPub> pub(new objects::CPub());
154 pub->SetPmid(CPub::TPmid(ENTREZ_ID_CONST(1)));
155 pdesc->SetPub().SetPub().Set().push_back(pub);
156
157 return pdesc;
158 }
159
160
AddGoodPub(CRef<objects::CSeq_entry> entry)161 void AddGoodPub (CRef<objects::CSeq_entry> entry)
162 {
163 CRef<objects::CSeqdesc> pdesc = BuildGoodPubSeqdesc();
164
165 if (entry->IsSeq()) {
166 entry->SetSeq().SetDescr().Set().push_back(pdesc);
167 } else if (entry->IsSet()) {
168 entry->SetSet().SetDescr().Set().push_back(pdesc);
169 }
170
171 CRef<objects::CSeqdesc> pdesc2 = BuildGoodPubSeqdesc();
172 pdesc2->SetPub().SetPub().Set().front()->Assign(*BuildGoodCitSubPub());
173 if (entry->IsSeq()) {
174 entry->SetSeq().SetDescr().Set().push_back(pdesc2);
175 } else if (entry->IsSet()) {
176 entry->SetSet().SetDescr().Set().push_back(pdesc2);
177 }
178
179 }
180
181
AddGoodSource(CRef<objects::CSeq_entry> entry)182 void AddGoodSource (CRef<objects::CSeq_entry> entry)
183 {
184 CRef<objects::CSeqdesc> odesc(new objects::CSeqdesc());
185 odesc->SetSource().SetOrg().SetTaxname("Sebaea microphylla");
186 odesc->SetSource().SetOrg().SetOrgname().SetLineage("some lineage");
187 SetTaxon(odesc->SetSource(), 592768);
188 CRef<objects::CSubSource> subsrc(new objects::CSubSource());
189 subsrc->SetSubtype(objects::CSubSource::eSubtype_chromosome);
190 subsrc->SetName("1");
191 odesc->SetSource().SetSubtype().push_back(subsrc);
192
193 if (entry->IsSeq()) {
194 entry->SetSeq().SetDescr().Set().push_back(odesc);
195 } else if (entry->IsSet()) {
196 entry->SetSet().SetDescr().Set().push_back(odesc);
197 }
198 }
199
200
SetDbxref(objects::CBioSource & src,string db,string id)201 void SetDbxref (objects::CBioSource& src, string db, string id)
202 {
203 CRef<objects::CDbtag> dbtag(new objects::CDbtag());
204 dbtag->SetDb(db);
205 dbtag->SetTag().SetStr(id);
206 src.SetOrg().SetDb().push_back(dbtag);
207 }
208
209
SetDbxref(CRef<objects::CSeq_entry> entry,string db,objects::CObject_id::TId id)210 void SetDbxref (CRef<objects::CSeq_entry> entry, string db, objects::CObject_id::TId id)
211 {
212 if (!entry) {
213 return;
214 }
215 if (entry->IsSeq()) {
216 NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSeq().SetDescr().Set()) {
217 if ((*it)->IsSource()) {
218 SetDbxref((*it)->SetSource(), db, id);
219 }
220 }
221 } else if (entry->IsSet()) {
222 NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSet().SetDescr().Set()) {
223 if ((*it)->IsSource()) {
224 SetDbxref((*it)->SetSource(), db, id);
225 }
226 }
227 }
228 }
229
230
SetDbxref(CRef<objects::CSeq_entry> entry,string db,string id)231 void SetDbxref (CRef<objects::CSeq_entry> entry, string db, string id)
232 {
233 if (!entry) {
234 return;
235 }
236 if (entry->IsSeq()) {
237 NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSeq().SetDescr().Set()) {
238 if ((*it)->IsSource()) {
239 SetDbxref((*it)->SetSource(), db, id);
240 }
241 }
242 } else if (entry->IsSet()) {
243 NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSet().SetDescr().Set()) {
244 if ((*it)->IsSource()) {
245 SetDbxref((*it)->SetSource(), db, id);
246 }
247 }
248 }
249 }
250
251
SetDbxref(CRef<objects::CSeq_feat> feat,string db,objects::CObject_id::TId id)252 void SetDbxref (CRef<objects::CSeq_feat> feat, string db, objects::CObject_id::TId id)
253 {
254 if (!feat) {
255 return;
256 }
257 CRef<objects::CDbtag> dbtag(new objects::CDbtag());
258 dbtag->SetDb(db);
259 dbtag->SetTag().SetId(id);
260 feat->SetDbxref().push_back(dbtag);
261 }
262
263
SetDbxref(CRef<objects::CSeq_feat> feat,string db,string id)264 void SetDbxref (CRef<objects::CSeq_feat> feat, string db, string id)
265 {
266 if (!feat) {
267 return;
268 }
269 CRef<objects::CDbtag> dbtag(new objects::CDbtag());
270 dbtag->SetDb(db);
271 dbtag->SetTag().SetStr(id);
272 feat->SetDbxref().push_back(dbtag);
273 }
274
275
276
RemoveDbxref(CRef<objects::CSeq_entry> entry,string db,objects::CObject_id::TId id)277 void RemoveDbxref (CRef<objects::CSeq_entry> entry, string db, objects::CObject_id::TId id)
278 {
279 if (!entry) {
280 return;
281 }
282 if (entry->IsSeq()) {
283 NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSeq().SetDescr().Set()) {
284 if ((*it)->IsSource()) {
285 RemoveDbxref((*it)->SetSource(), db, id);
286 }
287 }
288 } else if (entry->IsSet()) {
289 NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSet().SetDescr().Set()) {
290 if ((*it)->IsSource()) {
291 RemoveDbxref((*it)->SetSource(), db, id);
292 }
293 }
294 }
295 }
296
297
RemoveDbxref(CRef<objects::CSeq_feat> feat,string db,objects::CObject_id::TId id)298 void RemoveDbxref (CRef<objects::CSeq_feat> feat, string db, objects::CObject_id::TId id)
299 {
300 if (!feat) {
301 return;
302 }
303 if (feat->IsSetDbxref()) {
304 objects::CSeq_feat::TDbxref::iterator it = feat->SetDbxref().begin();
305 while (it != feat->SetDbxref().end()) {
306 if ((NStr::IsBlank(db) || ((*it)->IsSetDb() && NStr::Equal((*it)->GetDb(), db)))
307 && (id == 0 || ((*it)->IsSetTag() && (*it)->GetTag().IsId() && (*it)->GetTag().GetId() == id))) {
308 it = feat->SetDbxref().erase(it);
309 } else {
310 ++it;
311 }
312 }
313 }
314 }
315
316
SetTaxon(CRef<objects::CSeq_entry> entry,size_t taxon)317 void SetTaxon (CRef<objects::CSeq_entry> entry, size_t taxon)
318 {
319 if (!entry) {
320 return;
321 }
322 if (entry->IsSeq()) {
323 NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSeq().SetDescr().Set()) {
324 if ((*it)->IsSource()) {
325 SetTaxon((*it)->SetSource(), taxon);
326 }
327 }
328 } else if (entry->IsSet()) {
329 NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSet().SetDescr().Set()) {
330 if ((*it)->IsSource()) {
331 SetTaxon((*it)->SetSource(), taxon);
332 }
333 }
334 }
335 }
336
337
AddFeatAnnotToSeqEntry(CRef<objects::CSeq_annot> annot,CRef<objects::CSeq_entry> entry)338 void AddFeatAnnotToSeqEntry (CRef<objects::CSeq_annot> annot, CRef<objects::CSeq_entry> entry)
339 {
340 if (!entry || !annot) {
341 return;
342 }
343 if (entry->IsSeq()) {
344 entry->SetSeq().SetAnnot().push_back(annot);
345 } else if (entry->IsSet()) {
346 if (entry->GetSet().IsSetSeq_set()) {
347 AddFeatAnnotToSeqEntry (annot, entry->SetSet().SetSeq_set().front());
348 }
349 }
350 }
351
352
AddFeat(CRef<objects::CSeq_feat> feat,CRef<objects::CSeq_entry> entry)353 CRef<objects::CSeq_annot> AddFeat (CRef<objects::CSeq_feat> feat, CRef<objects::CSeq_entry> entry)
354 {
355 CRef<objects::CSeq_annot> annot;
356
357 if (entry->IsSeq()) {
358 if (!entry->GetSeq().IsSetAnnot()
359 || !entry->GetSeq().GetAnnot().front()->IsFtable()) {
360 CRef<objects::CSeq_annot> new_annot(new objects::CSeq_annot());
361 entry->SetSeq().SetAnnot().push_back(new_annot);
362 annot = new_annot;
363 } else {
364 annot = entry->SetSeq().SetAnnot().front();
365 }
366 } else if (entry->IsSet()) {
367 if (!entry->GetSet().IsSetAnnot()
368 || !entry->GetSet().GetAnnot().front()->IsFtable()) {
369 CRef<objects::CSeq_annot> new_annot(new objects::CSeq_annot());
370 entry->SetSet().SetAnnot().push_back(new_annot);
371 annot = new_annot;
372 } else {
373 annot = entry->SetSet().SetAnnot().front();
374 }
375 }
376 annot->SetData().SetFtable().push_back(feat);
377 return annot;
378 }
379
AddProtFeat(CRef<objects::CSeq_entry> entry)380 CRef<objects::CSeq_feat> AddProtFeat(CRef<objects::CSeq_entry> entry)
381 {
382 CRef<objects::CSeq_feat> feat (new objects::CSeq_feat());
383 feat->SetData().SetProt().SetName().push_back("fake protein name");
384 feat->SetLocation().SetInt().SetId().Assign(*(entry->GetSeq().GetId().front()));
385 feat->SetLocation().SetInt().SetFrom(0);
386 feat->SetLocation().SetInt().SetTo(entry->GetSeq().GetInst().GetLength() - 1);
387 AddFeat (feat, entry);
388 return feat;
389 }
390
391
AddGoodSourceFeature(CRef<objects::CSeq_entry> entry)392 CRef<objects::CSeq_feat> AddGoodSourceFeature(CRef<objects::CSeq_entry> entry)
393 {
394 CRef<objects::CSeq_feat> feat(new objects::CSeq_feat());
395 feat->SetData().SetBiosrc().SetOrg().SetTaxname("Trichechus manatus");
396 SetTaxon (feat->SetData().SetBiosrc(), 9778);
397 feat->SetData().SetBiosrc().SetOrg().SetOrgname().SetLineage("some lineage");
398 feat->SetLocation().SetInt().SetId().SetLocal().SetStr("good");
399 feat->SetLocation().SetInt().SetFrom(0);
400 feat->SetLocation().SetInt().SetTo(5);
401 CRef<objects::CSeq_annot> annot(new objects::CSeq_annot());
402 annot->SetData().SetFtable().push_back(feat);
403 AddFeatAnnotToSeqEntry (annot, entry);
404 return feat;
405 }
406
407
MakeMiscFeature(CRef<objects::CSeq_id> id,size_t right_end,size_t left_end)408 CRef<objects::CSeq_feat> MakeMiscFeature(CRef<objects::CSeq_id> id, size_t right_end, size_t left_end)
409 {
410 CRef<objects::CSeq_feat> feat(new objects::CSeq_feat());
411 feat->SetLocation().SetInt().SetId().Assign(*id);
412 feat->SetLocation().SetInt().SetFrom(left_end);
413 feat->SetLocation().SetInt().SetTo(right_end);
414 feat->SetData().SetImp().SetKey("misc_feature");
415 return feat;
416 }
417
418
BuildGoodFeat()419 CRef<CSeq_feat> BuildGoodFeat ()
420 {
421 CRef<CSeq_feat> feat(new CSeq_feat());
422 feat->SetLocation().SetInt().SetId().SetLocal().SetStr("good");
423 feat->SetLocation().SetInt().SetFrom(0);
424 feat->SetLocation().SetInt().SetTo(59);
425 feat->SetData().SetImp().SetKey("misc_feature");
426
427 return feat;
428 }
429
430
IdFromEntry(CRef<objects::CSeq_entry> entry)431 CRef<objects::CSeq_id> IdFromEntry(CRef<objects::CSeq_entry> entry)
432 {
433 if (entry->IsSeq()) {
434 return entry->SetSeq().SetId().front();
435 } else if (entry->IsSet()) {
436 return IdFromEntry (entry->SetSet().SetSeq_set().front());
437 } else {
438 CRef<objects::CSeq_id> empty;
439 return empty;
440 }
441 }
442
443
AddMiscFeature(CRef<objects::CSeq_entry> entry,size_t right_end)444 CRef<objects::CSeq_feat> AddMiscFeature(CRef<objects::CSeq_entry> entry, size_t right_end)
445 {
446 CRef<objects::CSeq_feat> feat = MakeMiscFeature(IdFromEntry(entry), right_end);
447 feat->SetComment("misc_feature needs a comment");
448 AddFeat (feat, entry);
449 return feat;
450 }
451
452
AddMiscFeature(CRef<objects::CSeq_entry> entry)453 CRef<objects::CSeq_feat> AddMiscFeature(CRef<objects::CSeq_entry> entry)
454 {
455 return AddMiscFeature (entry, 10);
456 }
457
458
SetTaxname(CRef<objects::CSeq_entry> entry,string taxname)459 void SetTaxname (CRef<objects::CSeq_entry> entry, string taxname)
460 {
461 if (!entry) {
462 return;
463 }
464 if (entry->IsSeq()) {
465 NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSeq().SetDescr().Set()) {
466 if ((*it)->IsSource()) {
467 if (NStr::IsBlank(taxname)) {
468 (*it)->SetSource().SetOrg().ResetTaxname();
469 } else {
470 (*it)->SetSource().SetOrg().SetTaxname(taxname);
471 }
472 }
473 }
474 } else if (entry->IsSet()) {
475 NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSet().SetDescr().Set()) {
476 if ((*it)->IsSource()) {
477 if (NStr::IsBlank(taxname)) {
478 (*it)->SetSource().SetOrg().ResetTaxname();
479 } else {
480 (*it)->SetSource().SetOrg().SetTaxname(taxname);
481 }
482 }
483 }
484 }
485 }
486
487
SetSebaea_microphylla(CRef<objects::CSeq_entry> entry)488 void SetSebaea_microphylla(CRef<objects::CSeq_entry> entry)
489 {
490 SetTaxname(entry, "Sebaea microphylla");
491 SetTaxon(entry, 0);
492 SetTaxon(entry, 592768);
493 }
494
495
SetSynthetic_construct(CRef<objects::CSeq_entry> entry)496 void SetSynthetic_construct(CRef<objects::CSeq_entry> entry)
497 {
498 SetTaxname(entry, "synthetic construct");
499 SetTaxon(entry, 0);
500 SetTaxon(entry, 32630);
501 }
502
503
SetDrosophila_melanogaster(CRef<objects::CSeq_entry> entry)504 void SetDrosophila_melanogaster(CRef<objects::CSeq_entry> entry)
505 {
506 SetTaxname(entry, "Drosophila melanogaster");
507 SetTaxon(entry, 0);
508 SetTaxon(entry, 7227);
509 }
510
SetCommon(CRef<objects::CSeq_entry> entry,string common)511 void SetCommon (CRef<objects::CSeq_entry> entry, string common)
512 {
513 if (!entry) {
514 return;
515 }
516 if (entry->IsSeq()) {
517 NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSeq().SetDescr().Set()) {
518 if ((*it)->IsSource()) {
519 if (NStr::IsBlank(common)) {
520 (*it)->SetSource().SetOrg().ResetCommon();
521 } else {
522 (*it)->SetSource().SetOrg().SetCommon(common);
523 }
524 }
525 }
526 } else if (entry->IsSet()) {
527 NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSet().SetDescr().Set()) {
528 if ((*it)->IsSource()) {
529 if (NStr::IsBlank(common)) {
530 (*it)->SetSource().SetOrg().ResetCommon();
531 } else {
532 (*it)->SetSource().SetOrg().SetCommon(common);
533 }
534 }
535 }
536 }
537 }
538
539
SetLineage(CRef<objects::CSeq_entry> entry,string lineage)540 void SetLineage (CRef<objects::CSeq_entry> entry, string lineage)
541 {
542 if (!entry) {
543 return;
544 }
545 if (entry->IsSeq()) {
546 NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSeq().SetDescr().Set()) {
547 if ((*it)->IsSource()) {
548 if (NStr::IsBlank(lineage)) {
549 (*it)->SetSource().SetOrg().SetOrgname().ResetLineage();
550 } else {
551 (*it)->SetSource().SetOrg().SetOrgname().SetLineage(lineage);
552 }
553 }
554 }
555 } else if (entry->IsSet()) {
556 NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSet().SetDescr().Set()) {
557 if ((*it)->IsSource()) {
558 if (NStr::IsBlank(lineage)) {
559 (*it)->SetSource().SetOrg().SetOrgname().ResetLineage();
560 } else {
561 (*it)->SetSource().SetOrg().SetOrgname().SetLineage(lineage);
562 }
563 }
564 }
565 }
566 }
567
568
SetDiv(CRef<objects::CSeq_entry> entry,string div)569 void SetDiv (CRef<objects::CSeq_entry> entry, string div)
570 {
571 if (!entry) {
572 return;
573 }
574 if (entry->IsSeq()) {
575 NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSeq().SetDescr().Set()) {
576 if ((*it)->IsSource()) {
577 if (NStr::IsBlank(div)) {
578 (*it)->SetSource().SetOrg().SetOrgname().ResetDiv();
579 } else {
580 (*it)->SetSource().SetOrg().SetOrgname().SetDiv(div);
581 }
582 }
583 }
584 } else if (entry->IsSet()) {
585 NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSet().SetDescr().Set()) {
586 if ((*it)->IsSource()) {
587 if (NStr::IsBlank(div)) {
588 (*it)->SetSource().SetOrg().SetOrgname().ResetDiv();
589 } else {
590 (*it)->SetSource().SetOrg().SetOrgname().SetDiv(div);
591 }
592 }
593 }
594 }
595 }
596
597
SetOrigin(CRef<objects::CSeq_entry> entry,objects::CBioSource::TOrigin origin)598 void SetOrigin (CRef<objects::CSeq_entry> entry, objects::CBioSource::TOrigin origin)
599 {
600 if (!entry) {
601 return;
602 }
603 if (entry->IsSeq()) {
604 NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSeq().SetDescr().Set()) {
605 if ((*it)->IsSource()) {
606 (*it)->SetSource().SetOrigin(origin);
607 }
608 }
609 } else if (entry->IsSet()) {
610 NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSet().SetDescr().Set()) {
611 if ((*it)->IsSource()) {
612 (*it)->SetSource().SetOrigin(origin);
613 }
614 }
615 }
616 }
617
618
SetGcode(CRef<objects::CSeq_entry> entry,objects::COrgName::TGcode gcode)619 void SetGcode (CRef<objects::CSeq_entry> entry, objects::COrgName::TGcode gcode)
620 {
621 if (!entry) {
622 return;
623 }
624 if (entry->IsSeq()) {
625 NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSeq().SetDescr().Set()) {
626 if ((*it)->IsSource()) {
627 (*it)->SetSource().SetOrg().SetOrgname().SetGcode(gcode);
628 }
629 }
630 } else if (entry->IsSet()) {
631 NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSet().SetDescr().Set()) {
632 if ((*it)->IsSource()) {
633 (*it)->SetSource().SetOrg().SetOrgname().SetGcode(gcode);
634 }
635 }
636 }
637 }
638
639
SetMGcode(CRef<objects::CSeq_entry> entry,objects::COrgName::TGcode mgcode)640 void SetMGcode (CRef<objects::CSeq_entry> entry, objects::COrgName::TGcode mgcode)
641 {
642 if (!entry) {
643 return;
644 }
645 if (entry->IsSeq()) {
646 NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSeq().SetDescr().Set()) {
647 if ((*it)->IsSource()) {
648 (*it)->SetSource().SetOrg().SetOrgname().SetMgcode(mgcode);
649 }
650 }
651 } else if (entry->IsSet()) {
652 NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSet().SetDescr().Set()) {
653 if ((*it)->IsSource()) {
654 (*it)->SetSource().SetOrg().SetOrgname().SetMgcode(mgcode);
655 }
656 }
657 }
658 }
659
660
SetPGcode(CRef<objects::CSeq_entry> entry,objects::COrgName::TGcode pgcode)661 void SetPGcode (CRef<objects::CSeq_entry> entry, objects::COrgName::TGcode pgcode)
662 {
663 if (!entry) {
664 return;
665 }
666 if (entry->IsSeq()) {
667 NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSeq().SetDescr().Set()) {
668 if ((*it)->IsSource()) {
669 (*it)->SetSource().SetOrg().SetOrgname().SetPgcode(pgcode);
670 }
671 }
672 } else if (entry->IsSet()) {
673 NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSet().SetDescr().Set()) {
674 if ((*it)->IsSource()) {
675 (*it)->SetSource().SetOrg().SetOrgname().SetPgcode(pgcode);
676 }
677 }
678 }
679 }
680
681
ResetOrgname(CRef<objects::CSeq_entry> entry)682 void ResetOrgname (CRef<objects::CSeq_entry> entry)
683 {
684 if (!entry) {
685 return;
686 }
687 if (entry->IsSeq()) {
688 NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSeq().SetDescr().Set()) {
689 if ((*it)->IsSource()) {
690 (*it)->SetSource().SetOrg().ResetOrgname();
691 }
692 }
693 } else if (entry->IsSet()) {
694 NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSet().SetDescr().Set()) {
695 if ((*it)->IsSource()) {
696 (*it)->SetSource().SetOrg().ResetOrgname();
697 }
698 }
699 }
700 }
701
702
SetFocus(CRef<objects::CSeq_entry> entry)703 void SetFocus (CRef<objects::CSeq_entry> entry)
704 {
705 if (!entry) {
706 return;
707 }
708 if (entry->IsSeq()) {
709 NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSeq().SetDescr().Set()) {
710 if ((*it)->IsSource()) {
711 (*it)->SetSource().SetIs_focus();
712 }
713 }
714 } else if (entry->IsSet()) {
715 NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSet().SetDescr().Set()) {
716 if ((*it)->IsSource()) {
717 (*it)->SetSource().SetIs_focus();
718 }
719 }
720 }
721 }
722
723
ClearFocus(CRef<objects::CSeq_entry> entry)724 void ClearFocus (CRef<objects::CSeq_entry> entry)
725 {
726 if (!entry) {
727 return;
728 }
729 if (entry->IsSeq()) {
730 NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSeq().SetDescr().Set()) {
731 if ((*it)->IsSource()) {
732 (*it)->SetSource().ResetIs_focus();
733 }
734 }
735 } else if (entry->IsSet()) {
736 NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSet().SetDescr().Set()) {
737 if ((*it)->IsSource()) {
738 (*it)->SetSource().ResetIs_focus();
739 }
740 }
741 }
742 }
743
744
SetGenome(CRef<objects::CSeq_entry> entry,objects::CBioSource::TGenome genome)745 void SetGenome (CRef<objects::CSeq_entry> entry, objects::CBioSource::TGenome genome)
746 {
747 if (!entry) {
748 return;
749 }
750 if (entry->IsSeq()) {
751 NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSeq().SetDescr().Set()) {
752 if ((*it)->IsSource()) {
753 (*it)->SetSource().SetGenome(genome);
754 }
755 }
756 } else if (entry->IsSet()) {
757 NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSet().SetDescr().Set()) {
758 if ((*it)->IsSource()) {
759 (*it)->SetSource().SetGenome(genome);
760 }
761 }
762 }
763 }
764
765
SetSubSource(objects::CBioSource & src,objects::CSubSource::TSubtype subtype,string val)766 void SetSubSource (objects::CBioSource& src, objects::CSubSource::TSubtype subtype, string val)
767 {
768 if (NStr::IsBlank(val)) {
769 if (src.IsSetSubtype()) {
770 objects::CBioSource::TSubtype::iterator it = src.SetSubtype().begin();
771 while (it != src.SetSubtype().end()) {
772 if ((*it)->IsSetSubtype() && (*it)->GetSubtype() == subtype) {
773 it = src.SetSubtype().erase(it);
774 } else {
775 ++it;
776 }
777 }
778 }
779 } else {
780 CRef<objects::CSubSource> sub(new objects::CSubSource(subtype, val));
781 if (NStr::EqualNocase(val, "true")) {
782 sub->SetName("");
783 }
784 src.SetSubtype().push_back(sub);
785 }
786 }
787
788
SetSubSource(CRef<objects::CSeq_entry> entry,objects::CSubSource::TSubtype subtype,string val)789 void SetSubSource (CRef<objects::CSeq_entry> entry, objects::CSubSource::TSubtype subtype, string val)
790 {
791 if (!entry) {
792 return;
793 }
794 if (entry->IsSeq()) {
795 NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSeq().SetDescr().Set()) {
796 if ((*it)->IsSource()) {
797 SetSubSource((*it)->SetSource(), subtype, val);
798 }
799 }
800 } else if (entry->IsSet()) {
801 NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSet().SetDescr().Set()) {
802 if ((*it)->IsSource()) {
803 SetSubSource((*it)->SetSource(), subtype, val);
804 }
805 }
806 }
807 }
808
809
SetChromosome(objects::CBioSource & src,string chromosome)810 void SetChromosome (objects::CBioSource& src, string chromosome)
811 {
812 if (NStr::IsBlank(chromosome)) {
813 if (src.IsSetSubtype()) {
814 objects::CBioSource::TSubtype::iterator it = src.SetSubtype().begin();
815 while (it != src.SetSubtype().end()) {
816 if ((*it)->IsSetSubtype() && (*it)->GetSubtype() == objects::CSubSource::eSubtype_chromosome) {
817 it = src.SetSubtype().erase(it);
818 } else {
819 ++it;
820 }
821 }
822 }
823 } else {
824 CRef<objects::CSubSource> sub(new objects::CSubSource(objects::CSubSource::eSubtype_chromosome, chromosome));
825 src.SetSubtype().push_back(sub);
826 }
827 }
828
829
SetChromosome(CRef<objects::CSeq_entry> entry,string chromosome)830 void SetChromosome (CRef<objects::CSeq_entry> entry, string chromosome)
831 {
832 if (!entry) {
833 return;
834 }
835 if (entry->IsSeq()) {
836 NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSeq().SetDescr().Set()) {
837 if ((*it)->IsSource()) {
838 SetChromosome((*it)->SetSource(), chromosome);
839 }
840 }
841 } else if (entry->IsSet()) {
842 NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSet().SetDescr().Set()) {
843 if ((*it)->IsSource()) {
844 SetChromosome((*it)->SetSource(), chromosome);
845 }
846 }
847 }
848 }
849
850
SetTransgenic(objects::CBioSource & src,bool do_set)851 void SetTransgenic (objects::CBioSource& src, bool do_set)
852 {
853 if (do_set) {
854 CRef<objects::CSubSource> sub(new objects::CSubSource(objects::CSubSource::eSubtype_transgenic, ""));
855 src.SetSubtype().push_back(sub);
856 } else if (src.IsSetSubtype()) {
857 objects::CBioSource::TSubtype::iterator it = src.SetSubtype().begin();
858 while (it != src.SetSubtype().end()) {
859 if ((*it)->IsSetSubtype() && (*it)->GetSubtype() == objects::CSubSource::eSubtype_transgenic) {
860 it = src.SetSubtype().erase(it);
861 } else {
862 ++it;
863 }
864 }
865 }
866 }
867
868
SetTransgenic(CRef<objects::CSeq_entry> entry,bool do_set)869 void SetTransgenic (CRef<objects::CSeq_entry> entry, bool do_set)
870 {
871 if (!entry) {
872 return;
873 }
874 if (entry->IsSeq()) {
875 NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSeq().SetDescr().Set()) {
876 if ((*it)->IsSource()) {
877 SetTransgenic((*it)->SetSource(), do_set);
878 }
879 }
880 } else if (entry->IsSet()) {
881 NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSet().SetDescr().Set()) {
882 if ((*it)->IsSource()) {
883 SetTransgenic((*it)->SetSource(), do_set);
884 }
885 }
886 }
887 }
888
889
SetOrgMod(objects::CBioSource & src,objects::COrgMod::TSubtype subtype,string val)890 void SetOrgMod (objects::CBioSource& src, objects::COrgMod::TSubtype subtype, string val)
891 {
892 if (NStr::IsBlank(val)) {
893 if (src.IsSetOrg() && src.GetOrg().IsSetOrgname() && src.GetOrg().GetOrgname().IsSetMod()) {
894 objects::COrgName::TMod::iterator it = src.SetOrg().SetOrgname().SetMod().begin();
895 while (it != src.SetOrg().SetOrgname().SetMod().end()) {
896 if ((*it)->IsSetSubtype() && (*it)->GetSubtype() == subtype) {
897 it = src.SetOrg().SetOrgname().SetMod().erase(it);
898 } else {
899 ++it;
900 }
901 }
902 }
903 } else {
904 CRef<objects::COrgMod> sub(new objects::COrgMod(subtype, val));
905 src.SetOrg().SetOrgname().SetMod().push_back(sub);
906 }
907 }
908
909
SetOrgMod(CRef<objects::CSeq_entry> entry,objects::COrgMod::TSubtype subtype,string val)910 void SetOrgMod (CRef<objects::CSeq_entry> entry, objects::COrgMod::TSubtype subtype, string val)
911 {
912 if (!entry) {
913 return;
914 }
915 if (entry->IsSeq()) {
916 NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSeq().SetDescr().Set()) {
917 if ((*it)->IsSource()) {
918 SetOrgMod((*it)->SetSource(), subtype, val);
919 }
920 }
921 } else if (entry->IsSet()) {
922 NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSet().SetDescr().Set()) {
923 if ((*it)->IsSource()) {
924 SetOrgMod((*it)->SetSource(), subtype, val);
925 }
926 }
927 }
928 }
929
930
BuildGoodAuthor()931 CRef<objects::CAuthor> BuildGoodAuthor()
932 {
933 CRef<objects::CAuthor> author(new objects::CAuthor());
934 author->SetName().SetName().SetLast("Last");
935 author->SetName().SetName().SetFirst("First");
936 author->SetName().SetName().SetMiddle("M");
937 return author;
938 }
939
940
BuildGoodArticlePub()941 CRef<objects::CPub> BuildGoodArticlePub()
942 {
943 CRef<objects::CPub> pub(new objects::CPub());
944
945 CRef<objects::CCit_art::TTitle::C_E> art_title(new objects::CCit_art::TTitle::C_E());
946 art_title->SetName("article title");
947 pub->SetArticle().SetTitle().Set().push_back(art_title);
948 CRef<objects::CCit_jour::TTitle::C_E> journal_title(new objects::CCit_jour::TTitle::C_E());
949 journal_title->SetName("journal_title");
950 pub->SetArticle().SetFrom().SetJournal().SetTitle().Set().push_back(journal_title);
951 CRef<objects::CCit_jour::TTitle::C_E> iso_jta(new objects::CCit_jour::TTitle::C_E());
952 iso_jta->SetIso_jta("abbr");
953 pub->SetArticle().SetFrom().SetJournal().SetTitle().Set().push_back(iso_jta);
954 pub->SetArticle().SetAuthors().SetNames().SetStd().push_back(BuildGoodAuthor());
955 pub->SetArticle().SetFrom().SetJournal().SetImp().SetVolume("vol 1");
956 pub->SetArticle().SetFrom().SetJournal().SetImp().SetPages("14-32");
957 pub->SetArticle().SetFrom().SetJournal().SetImp().SetDate().SetStd().SetYear(2009);
958 return pub;
959 }
960
961
BuildGoodCitGenPub(CRef<objects::CAuthor> author,int serial_number)962 CRef<objects::CPub> BuildGoodCitGenPub(CRef<objects::CAuthor> author, int serial_number)
963 {
964 CRef<objects::CPub> pub(new objects::CPub());
965 if (!author) {
966 author = BuildGoodAuthor();
967 }
968 pub->SetGen().SetAuthors().SetNames().SetStd().push_back(author);
969 pub->SetGen().SetTitle("gen title");
970 pub->SetGen().SetDate().SetStd().SetYear(2009);
971 if (serial_number > -1) {
972 pub->SetGen().SetSerial_number(serial_number);
973 }
974 return pub;
975 }
976
977
BuildGoodCitSubPub()978 CRef<objects::CPub> BuildGoodCitSubPub()
979 {
980 CRef<objects::CPub> pub(new objects::CPub());
981 CRef<objects::CAuthor> author = BuildGoodAuthor();
982 pub->SetSub().SetAuthors().SetNames().SetStd().push_back(author);
983 pub->SetSub().SetAuthors().SetAffil().SetStd().SetAffil("A Major University");
984 pub->SetSub().SetAuthors().SetAffil().SetStd().SetSub("Maryland");
985 pub->SetSub().SetAuthors().SetAffil().SetStd().SetCountry("USA");
986 pub->SetSub().SetDate().SetStd().SetYear(2009);
987 return pub;
988 }
989
990
MakeSeqLong(objects::CBioseq & seq)991 void MakeSeqLong(objects::CBioseq& seq)
992 {
993 if (seq.SetInst().IsSetSeq_data()) {
994 if (seq.GetInst().GetSeq_data().IsIupacna()) {
995 seq.SetInst().SetSeq_data().SetIupacna().Set().clear();
996 for (int i = 0; i < 100; i++) {
997 seq.SetInst().SetSeq_data().SetIupacna().Set().append(
998 "AAAAATTTTTGGGGGCCCCCTTTTTAAAAATTTTTGGGGGCCCCCTTTTTAAAAATTTTTGGGGGCCCCCTTTTTAAAAATTTTTGGGGGCCCCCTTTTT");
999 }
1000 seq.SetInst().SetLength(10000);
1001 } else if (seq.GetInst().GetSeq_data().IsIupacaa()) {
1002 seq.SetInst().SetSeq_data().SetIupacaa().Set().clear();
1003 for (int i = 0; i < 100; i++) {
1004 seq.SetInst().SetSeq_data().SetIupacaa().Set().append(
1005 "MPRKTEINSLMPRKTEINSLMPRKTEINSLMPRKTEINSLMPRKTEINSLMPRKTEINSLMPRKTEINSLMPRKTEINSLMPRKTEINSLMPRKTEINSL");
1006 }
1007 seq.SetInst().SetLength(10000);
1008 }
1009 }
1010 }
1011
1012
SetBiomol(CRef<objects::CSeq_entry> entry,objects::CMolInfo::TBiomol biomol)1013 void SetBiomol (CRef<objects::CSeq_entry> entry, objects::CMolInfo::TBiomol biomol)
1014 {
1015 bool found = false;
1016
1017 NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSeq().SetDescr().Set()) {
1018 if ((*it)->IsMolinfo()) {
1019 (*it)->SetMolinfo().SetBiomol(biomol);
1020 found = true;
1021 }
1022 }
1023 if (!found) {
1024 CRef<objects::CSeqdesc> mdesc(new objects::CSeqdesc());
1025 mdesc->SetMolinfo().SetBiomol(biomol);
1026 entry->SetSeq().SetDescr().Set().push_back(mdesc);
1027 }
1028 }
1029
1030
SetTech(CRef<objects::CSeq_entry> entry,objects::CMolInfo::TTech tech)1031 void SetTech (CRef<objects::CSeq_entry> entry, objects::CMolInfo::TTech tech)
1032 {
1033 bool found = false;
1034
1035 NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSeq().SetDescr().Set()) {
1036 if ((*it)->IsMolinfo()) {
1037 (*it)->SetMolinfo().SetTech(tech);
1038 found = true;
1039 }
1040 }
1041 if (!found) {
1042 CRef<objects::CSeqdesc> mdesc(new objects::CSeqdesc());
1043 mdesc->SetMolinfo().SetTech(tech);
1044 entry->SetSeq().SetDescr().Set().push_back(mdesc);
1045 }
1046 }
1047
1048
SetCompleteness(CRef<objects::CSeq_entry> entry,objects::CMolInfo::TCompleteness completeness)1049 void SetCompleteness(CRef<objects::CSeq_entry> entry, objects::CMolInfo::TCompleteness completeness)
1050 {
1051 if (entry->IsSeq()) {
1052 bool found = false;
1053 NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSeq().SetDescr().Set()) {
1054 if ((*it)->IsMolinfo()) {
1055 (*it)->SetMolinfo().SetCompleteness (completeness);
1056 found = true;
1057 }
1058 }
1059 if (!found) {
1060 CRef<objects::CSeqdesc> mdesc(new objects::CSeqdesc());
1061 if (entry->GetSeq().IsAa()) {
1062 mdesc->SetMolinfo().SetBiomol(objects::CMolInfo::eBiomol_peptide);
1063 } else {
1064 mdesc->SetMolinfo().SetBiomol(objects::CMolInfo::eBiomol_genomic);
1065 }
1066 mdesc->SetMolinfo().SetCompleteness (completeness);
1067 entry->SetSeq().SetDescr().Set().push_back(mdesc);
1068 }
1069 }
1070 }
1071
1072
BuildGoodProtSeq(void)1073 CRef<objects::CSeq_entry> BuildGoodProtSeq(void)
1074 {
1075 CRef<objects::CSeq_entry> entry = BuildGoodSeq();
1076
1077 entry->SetSeq().SetInst().SetMol(objects::CSeq_inst::eMol_aa);
1078 entry->SetSeq().SetInst().SetSeq_data().SetIupacaa().Set("PRKTEIN");
1079 entry->SetSeq().SetInst().SetLength(7);
1080 NON_CONST_ITERATE (objects::CSeq_descr::Tdata, it, entry->SetSeq().SetDescr().Set()) {
1081 if ((*it)->IsMolinfo()) {
1082 (*it)->SetMolinfo().SetBiomol(objects::CMolInfo::eBiomol_peptide);
1083 }
1084 }
1085
1086 AddProtFeat (entry);
1087
1088 return entry;
1089 }
1090
1091
MakeProteinForGoodNucProtSet(string id)1092 CRef<objects::CSeq_entry> MakeProteinForGoodNucProtSet (string id)
1093 {
1094 // make protein
1095 CRef<objects::CBioseq> pseq(new objects::CBioseq());
1096 pseq->SetInst().SetMol(objects::CSeq_inst::eMol_aa);
1097 pseq->SetInst().SetRepr(objects::CSeq_inst::eRepr_raw);
1098 pseq->SetInst().SetSeq_data().SetIupacaa().Set("MPRKTEIN");
1099 pseq->SetInst().SetLength(8);
1100
1101 CRef<objects::CSeq_id> pid(new objects::CSeq_id());
1102 pid->SetLocal().SetStr (id);
1103 pseq->SetId().push_back(pid);
1104
1105 CRef<objects::CSeqdesc> mpdesc(new objects::CSeqdesc());
1106 mpdesc->SetMolinfo().SetBiomol(objects::CMolInfo::eBiomol_peptide);
1107 mpdesc->SetMolinfo().SetCompleteness(objects::CMolInfo::eCompleteness_complete);
1108 pseq->SetDescr().Set().push_back(mpdesc);
1109
1110 CRef<objects::CSeq_entry> pentry(new objects::CSeq_entry());
1111 pentry->SetSeq(*pseq);
1112
1113 CRef<objects::CSeq_feat> feat (new objects::CSeq_feat());
1114 feat->SetData().SetProt().SetName().push_back("fake protein name");
1115 feat->SetLocation().SetInt().SetId().SetLocal().SetStr(id);
1116 feat->SetLocation().SetInt().SetFrom(0);
1117 feat->SetLocation().SetInt().SetTo(7);
1118 AddFeat (feat, pentry);
1119
1120 return pentry;
1121 }
1122
1123
MakeCDSForGoodNucProtSet(const string & nuc_id,const string & prot_id)1124 CRef<objects::CSeq_feat> MakeCDSForGoodNucProtSet (const string& nuc_id, const string& prot_id)
1125 {
1126 CRef<objects::CSeq_feat> cds (new objects::CSeq_feat());
1127 cds->SetData().SetCdregion();
1128 cds->SetProduct().SetWhole().SetLocal().SetStr(prot_id);
1129 cds->SetLocation().SetInt().SetId().SetLocal().SetStr(nuc_id);
1130 cds->SetLocation().SetInt().SetFrom(0);
1131 cds->SetLocation().SetInt().SetTo(26);
1132 return cds;
1133 }
1134
1135
BuildGoodNucProtSet(void)1136 CRef<objects::CSeq_entry> BuildGoodNucProtSet(void)
1137 {
1138 CRef<objects::CBioseq_set> set(new objects::CBioseq_set());
1139 set->SetClass(objects::CBioseq_set::eClass_nuc_prot);
1140
1141 // make nucleotide
1142 CRef<objects::CBioseq> nseq(new objects::CBioseq());
1143 nseq->SetInst().SetMol(objects::CSeq_inst::eMol_dna);
1144 nseq->SetInst().SetRepr(objects::CSeq_inst::eRepr_raw);
1145 nseq->SetInst().SetSeq_data().SetIupacna().Set("ATGCCCAGAAAAACAGAGATAAACTAAGGGATGCCCAGAAAAACAGAGATAAACTAAGGG");
1146 nseq->SetInst().SetLength(60);
1147
1148 CRef<objects::CSeq_id> id(new objects::CSeq_id());
1149 id->SetLocal().SetStr ("nuc");
1150 nseq->SetId().push_back(id);
1151
1152 CRef<objects::CSeqdesc> mdesc(new objects::CSeqdesc());
1153 mdesc->SetMolinfo().SetBiomol(objects::CMolInfo::eBiomol_genomic);
1154 nseq->SetDescr().Set().push_back(mdesc);
1155
1156 CRef<objects::CSeq_entry> nentry(new objects::CSeq_entry());
1157 nentry->SetSeq(*nseq);
1158
1159 set->SetSeq_set().push_back(nentry);
1160
1161 // make protein
1162 CRef<objects::CSeq_entry> pentry = MakeProteinForGoodNucProtSet("prot");
1163
1164 set->SetSeq_set().push_back(pentry);
1165
1166 CRef<objects::CSeq_entry> set_entry(new objects::CSeq_entry());
1167 set_entry->SetSet(*set);
1168
1169 CRef<objects::CSeq_feat> cds = MakeCDSForGoodNucProtSet("nuc", "prot");
1170 AddFeat (cds, set_entry);
1171
1172 AddGoodSource (set_entry);
1173 AddGoodPub(set_entry);
1174 return set_entry;
1175 }
1176
1177
AdjustProtFeatForNucProtSet(CRef<objects::CSeq_entry> entry)1178 void AdjustProtFeatForNucProtSet(CRef<objects::CSeq_entry> entry)
1179 {
1180 CRef<objects::CSeq_feat> prot;
1181 CRef<objects::CSeq_entry> prot_seq;
1182
1183 if (!entry) {
1184 return;
1185 }
1186 if (entry->IsSeq()) {
1187 prot_seq = entry;
1188 prot = entry->SetSeq().SetAnnot().front()->SetData().SetFtable().front();
1189 } else if (entry->IsSet()) {
1190 prot_seq = entry->SetSet().SetSeq_set().back();
1191 prot = prot_seq->SetSeq().SetAnnot().front()->SetData().SetFtable().front();
1192 }
1193 if (prot && prot_seq) {
1194 prot->SetLocation().SetInt().SetTo(prot_seq->SetSeq().SetInst().SetLength() - 1);
1195 }
1196 }
1197
1198
SetNucProtSetProductName(CRef<objects::CSeq_entry> entry,string new_name)1199 void SetNucProtSetProductName (CRef<objects::CSeq_entry> entry, string new_name)
1200 {
1201 CRef<objects::CSeq_feat> prot;
1202 CRef<objects::CSeq_entry> prot_seq;
1203
1204 if (!entry) {
1205 return;
1206 }
1207 if (entry->IsSeq()) {
1208 prot_seq = entry;
1209 prot = entry->SetSeq().SetAnnot().front()->SetData().SetFtable().front();
1210 } else if (entry->IsSet()) {
1211 prot_seq = entry->SetSet().SetSeq_set().back();
1212 prot = prot_seq->SetSeq().SetAnnot().front()->SetData().SetFtable().front();
1213 }
1214 if (prot) {
1215 if (prot->SetData().SetProt().SetName().size() > 0) {
1216 prot->SetData().SetProt().SetName().pop_front();
1217 }
1218 prot->SetData().SetProt().SetName().push_front(new_name);
1219 }
1220 }
1221
1222
GetCDSFromGoodNucProtSet(CRef<objects::CSeq_entry> entry)1223 CRef<objects::CSeq_feat> GetCDSFromGoodNucProtSet (CRef<objects::CSeq_entry> entry)
1224 {
1225 return entry->SetSet().SetAnnot().front()->SetData().SetFtable().front();
1226 }
1227
1228
GetNucleotideSequenceFromGoodNucProtSet(CRef<objects::CSeq_entry> entry)1229 CRef<objects::CSeq_entry> GetNucleotideSequenceFromGoodNucProtSet (CRef<objects::CSeq_entry> entry)
1230 {
1231 return entry->SetSet().SetSeq_set().front();
1232 }
1233
1234
GetProteinSequenceFromGoodNucProtSet(CRef<objects::CSeq_entry> entry)1235 CRef<objects::CSeq_entry> GetProteinSequenceFromGoodNucProtSet (CRef<objects::CSeq_entry> entry)
1236 {
1237 return entry->SetSet().SetSeq_set().back();
1238 }
1239
1240
GetProtFeatFromGoodNucProtSet(CRef<objects::CSeq_entry> entry)1241 CRef<objects::CSeq_feat> GetProtFeatFromGoodNucProtSet (CRef<objects::CSeq_entry> entry)
1242 {
1243 CRef<objects::CSeq_entry> pentry = GetProteinSequenceFromGoodNucProtSet(entry);
1244 return pentry->SetSeq().SetAnnot().front()->SetData().SetFtable().front();
1245 }
1246
1247
RetranslateCdsForNucProtSet(CRef<objects::CSeq_entry> entry,objects::CScope & scope)1248 void RetranslateCdsForNucProtSet (CRef<objects::CSeq_entry> entry, objects::CScope &scope)
1249 {
1250 CRef<objects::CSeq_feat> cds = GetCDSFromGoodNucProtSet(entry);
1251 CRef<objects::CBioseq> bioseq = CSeqTranslator::TranslateToProtein(*cds, scope);
1252 CRef<objects::CSeq_entry> pentry = GetProteinSequenceFromGoodNucProtSet(entry);
1253 pentry->SetSeq().SetInst().Assign(bioseq->GetInst());
1254 AdjustProtFeatForNucProtSet (entry);
1255 }
1256
1257
SetProteinPartial(CRef<CSeq_entry> pentry,bool partial5,bool partial3)1258 void SetProteinPartial(CRef<CSeq_entry> pentry, bool partial5, bool partial3)
1259 {
1260 CRef<CSeq_feat> prot = pentry->SetAnnot().front()->SetData().SetFtable().front();
1261 prot->SetPartial(partial5 || partial3);
1262 prot->SetLocation().SetPartialStart(partial5, objects::eExtreme_Biological);
1263 prot->SetLocation().SetPartialStop(partial3, objects::eExtreme_Biological);
1264
1265 // molinfo completeness
1266 if (partial5 && partial3) {
1267 SetCompleteness (pentry, objects::CMolInfo::eCompleteness_no_ends);
1268 } else if (partial5) {
1269 SetCompleteness (pentry, objects::CMolInfo::eCompleteness_no_left);
1270 } else if (partial3) {
1271 SetCompleteness (pentry, objects::CMolInfo::eCompleteness_no_right);
1272 } else {
1273 SetCompleteness (pentry, objects::CMolInfo::eCompleteness_complete);
1274 }
1275 }
1276
1277
SetNucProtSetPartials(CRef<objects::CSeq_entry> entry,bool partial5,bool partial3)1278 void SetNucProtSetPartials (CRef<objects::CSeq_entry> entry, bool partial5, bool partial3)
1279 {
1280 // partials for CDS
1281 CRef<objects::CSeq_feat> cds = GetCDSFromGoodNucProtSet(entry);
1282 cds->SetPartial(partial5 || partial3);
1283 cds->SetLocation().SetPartialStart(partial5, objects::eExtreme_Biological);
1284 cds->SetLocation().SetPartialStop(partial3, objects::eExtreme_Biological);
1285
1286 CRef<objects::CSeq_entry> pentry = GetProteinSequenceFromGoodNucProtSet(entry);
1287 SetProteinPartial(pentry, partial5, partial3);
1288 }
1289
1290
ChangeNucProtSetProteinId(CRef<objects::CSeq_entry> entry,CRef<objects::CSeq_id> id)1291 void ChangeNucProtSetProteinId (CRef<objects::CSeq_entry> entry, CRef<objects::CSeq_id> id)
1292 {
1293 CRef<objects::CSeq_entry> pseq = GetProteinSequenceFromGoodNucProtSet(entry);
1294 pseq->SetSeq().SetId().front()->Assign(*id);
1295
1296 CRef<objects::CSeq_feat> pfeat = GetProtFeatFromGoodNucProtSet(entry);
1297 pfeat->SetLocation().SetInt().SetId().Assign(*id);
1298
1299 CRef<objects::CSeq_feat> cds = GetCDSFromGoodNucProtSet(entry);
1300 cds->SetProduct().SetWhole().Assign(*id);
1301 }
1302
1303
ChangeNucProtSetNucId(CRef<objects::CSeq_entry> entry,CRef<objects::CSeq_id> id)1304 void ChangeNucProtSetNucId (CRef<objects::CSeq_entry> entry, CRef<objects::CSeq_id> id)
1305 {
1306 CRef<objects::CSeq_entry> nseq = GetNucleotideSequenceFromGoodNucProtSet(entry);
1307 nseq->SetSeq().SetId().front()->Assign(*id);
1308
1309 CRef<objects::CSeq_feat> cds = GetCDSFromGoodNucProtSet(entry);
1310 if(cds->GetLocation().IsInt()) {
1311 cds->SetLocation().SetInt().SetId().Assign(*id);
1312 } else if (cds->GetLocation().IsMix()) {
1313 cds->SetLocation().SetMix().Set().front()->SetInt().SetId().Assign(*id);
1314 cds->SetLocation().SetMix().Set().back()->SetInt().SetId().Assign(*id);
1315 }
1316 }
1317
1318
MakeNucProtSet3Partial(CRef<objects::CSeq_entry> entry)1319 void MakeNucProtSet3Partial (CRef<objects::CSeq_entry> entry)
1320 {
1321 CRef<objects::CSeq_feat> cds = GetCDSFromGoodNucProtSet(entry);
1322 cds->SetLocation().SetInt().SetTo(59);
1323 cds->SetLocation().SetPartialStop(true, objects::eExtreme_Biological);
1324 cds->SetPartial(true);
1325 CRef<objects::CSeq_entry> nuc_seq = entry->SetSet().SetSeq_set().front();
1326 nuc_seq->SetSeq().SetInst().SetSeq_data().SetIupacna().Set("ATGCCCAGAAAAACAGAGATAAACAAAGGGATGCCCAGAAAAACAGAGATAAACAAAGGG");
1327 CRef<objects::CSeq_entry> prot_seq = entry->SetSet().SetSeq_set().back();
1328 prot_seq->SetSeq().SetInst().SetSeq_data().SetIupacaa().Set("MPRKTEINKGMPRKTEINKG");
1329 prot_seq->SetSeq().SetInst().SetLength(20);
1330 SetCompleteness (prot_seq, objects::CMolInfo::eCompleteness_no_right);
1331 CRef<objects::CSeq_feat> prot = prot_seq->SetSeq().SetAnnot().front()->SetData().SetFtable().front();
1332 prot->SetLocation().SetInt().SetTo(19);
1333 prot->SetLocation().SetPartialStop(true, objects::eExtreme_Biological);
1334 prot->SetPartial(true);
1335
1336 }
1337
1338
ChangeId(CRef<objects::CSeq_annot> annot,CRef<objects::CSeq_id> id)1339 void ChangeId(CRef<objects::CSeq_annot> annot, CRef<objects::CSeq_id> id)
1340 {
1341 if (annot && annot->IsFtable()) {
1342 objects::CSeq_annot::C_Data::TFtable::iterator it = annot->SetData().SetFtable().begin();
1343 while (it != annot->SetData().SetFtable().end()) {
1344 (*it)->SetLocation().SetInt().SetId().Assign(*id);
1345 ++it;
1346 }
1347 }
1348 }
1349
1350
ChangeProductId(CRef<objects::CSeq_annot> annot,CRef<objects::CSeq_id> id)1351 void ChangeProductId(CRef<objects::CSeq_annot> annot, CRef<objects::CSeq_id> id)
1352 {
1353 if (annot && annot->IsFtable()) {
1354 objects::CSeq_annot::C_Data::TFtable::iterator it = annot->SetData().SetFtable().begin();
1355 while (it != annot->SetData().SetFtable().end()) {
1356 if ((*it)->IsSetProduct()) {
1357 (*it)->SetProduct().SetWhole().Assign(*id);
1358 }
1359 ++it;
1360 }
1361 }
1362 }
1363
1364
ChangeNucId(CRef<objects::CSeq_entry> np_set,CRef<objects::CSeq_id> id)1365 void ChangeNucId(CRef<objects::CSeq_entry> np_set, CRef<objects::CSeq_id> id)
1366 {
1367 if (!np_set || !np_set->IsSet()) {
1368 return;
1369 }
1370
1371 CRef<objects::CSeq_entry> nuc_entry = np_set->SetSet().SetSeq_set().front();
1372
1373 nuc_entry->SetSeq().SetId().front()->Assign(*id);
1374 if (nuc_entry->SetSeq().IsSetAnnot()) {
1375 NON_CONST_ITERATE(objects::CSeq_entry::TAnnot, annot_it, nuc_entry->SetSeq().SetAnnot()) {
1376 ChangeId (*annot_it, id);
1377 }
1378 }
1379 if (np_set->SetSet().IsSetAnnot()) {
1380 NON_CONST_ITERATE(objects::CSeq_entry::TAnnot, annot_it, np_set->SetSet().SetAnnot()) {
1381 ChangeId (*annot_it, id);
1382 }
1383 }
1384 }
1385
1386
ChangeProtId(CRef<objects::CSeq_entry> np_set,CRef<objects::CSeq_id> id)1387 void ChangeProtId(CRef<objects::CSeq_entry> np_set, CRef<objects::CSeq_id> id)
1388 {
1389 if (!np_set || !np_set->IsSet()) {
1390 return;
1391 }
1392
1393 CRef<objects::CSeq_entry> prot_entry = np_set->SetSet().SetSeq_set().back();
1394 CRef<objects::CSeq_feat> cds = GetCDSFromGoodNucProtSet(np_set);
1395
1396 prot_entry->SetSeq().SetId().front()->Assign(*id);
1397 EDIT_EACH_SEQANNOT_ON_BIOSEQ (annot_it, prot_entry->SetSeq()) {
1398 ChangeId (*annot_it, id);
1399 }
1400
1401 EDIT_EACH_SEQANNOT_ON_SEQSET (annot_it, np_set->SetSet()) {
1402 ChangeProductId (*annot_it, id);
1403 }
1404 }
1405
1406
BuildRefSeqId(void)1407 CRef<objects::CSeq_id> BuildRefSeqId(void)
1408 {
1409 CRef<objects::CSeq_id> id(new objects::CSeq_id());
1410 id->SetOther().SetAccession("NC_123456");
1411 return id;
1412 }
1413
1414
ChangeId(CRef<objects::CSeq_entry> entry,CRef<objects::CSeq_id> id)1415 void ChangeId(CRef<objects::CSeq_entry> entry, CRef<objects::CSeq_id> id)
1416 {
1417 if (entry->IsSeq()) {
1418 entry->SetSeq().SetId().front()->Assign(*id);
1419 if (entry->SetSeq().IsSetAnnot()) {
1420 objects::CBioseq::TAnnot::iterator annot_it = entry->SetSeq().SetAnnot().begin();
1421 while (annot_it != entry->SetSeq().SetAnnot().end()) {
1422 if ((*annot_it)->IsFtable()) {
1423 objects::CSeq_annot::C_Data::TFtable::iterator it = (*annot_it)->SetData().SetFtable().begin();
1424 while (it != (*annot_it)->SetData().SetFtable().end()) {
1425 (*it)->SetLocation().SetId(*id);
1426 ++it;
1427 }
1428 }
1429 ++annot_it;
1430 }
1431 }
1432 }
1433 }
1434
1435
ChangeId(CRef<objects::CSeq_annot> annot,string suffix)1436 void ChangeId(CRef<objects::CSeq_annot> annot, string suffix)
1437 {
1438 if (annot && annot->IsFtable()) {
1439 objects::CSeq_annot::C_Data::TFtable::iterator it = annot->SetData().SetFtable().begin();
1440 while (it != annot->SetData().SetFtable().end()) {
1441 (*it)->SetLocation().SetInt().SetId().SetLocal().SetStr().append(suffix);
1442 if ((*it)->IsSetProduct()) {
1443 (*it)->SetProduct().SetWhole().SetLocal().SetStr().append(suffix);
1444 }
1445 ++it;
1446 }
1447 }
1448 }
1449
1450
ChangeId(CRef<objects::CSeq_entry> entry,string suffix)1451 void ChangeId(CRef<objects::CSeq_entry> entry, string suffix)
1452 {
1453 if (entry->IsSeq()) {
1454 entry->SetSeq().SetId().front()->SetLocal().SetStr().append(suffix);
1455 if (entry->SetSeq().IsSetAnnot()) {
1456 objects::CBioseq::TAnnot::iterator annot_it = entry->SetSeq().SetAnnot().begin();
1457 while (annot_it != entry->SetSeq().SetAnnot().end()) {
1458 ChangeId(*annot_it, suffix);
1459 ++annot_it;
1460 }
1461 }
1462 } else if (entry->IsSet()) {
1463 objects::CBioseq_set::TSeq_set::iterator it = entry->SetSet().SetSeq_set().begin();
1464 while (it != entry->SetSet().SetSeq_set().end()) {
1465 ChangeId(*it, suffix);
1466 ++it;
1467 }
1468 if (entry->SetSet().IsSetAnnot()) {
1469 objects::CBioseq_set::TAnnot::iterator annot_it = entry->SetSet().SetAnnot().begin();
1470 while (annot_it != entry->SetSet().SetAnnot().end()) {
1471 ChangeId(*annot_it, suffix);
1472 ++annot_it;
1473 }
1474 }
1475 }
1476 }
1477
1478
BuildGenProdSetNucProtSet(CRef<objects::CSeq_id> nuc_id,CRef<objects::CSeq_id> prot_id)1479 CRef<objects::CSeq_entry> BuildGenProdSetNucProtSet (CRef<objects::CSeq_id> nuc_id, CRef<objects::CSeq_id> prot_id)
1480 {
1481 CRef<objects::CSeq_entry> np = BuildGoodNucProtSet();
1482 CRef<objects::CSeq_entry> nuc = GetNucleotideSequenceFromGoodNucProtSet(np);
1483 nuc->SetSeq().SetInst().SetSeq_data().SetIupacna().Set("ATGCCCAGAAAAACAGAGATAAACTAA");
1484 nuc->SetSeq().SetInst().SetLength(27);
1485 nuc->SetSeq().SetInst().SetMol(objects::CSeq_inst::eMol_rna);
1486 SetBiomol(nuc, objects::CMolInfo::eBiomol_mRNA);
1487 if (nuc_id) {
1488 ChangeNucProtSetNucId(np, nuc_id);
1489 }
1490 if (prot_id) {
1491 ChangeNucProtSetProteinId(np, prot_id);
1492 }
1493 return np;
1494 }
1495
1496
MakemRNAForCDS(CRef<objects::CSeq_feat> feat)1497 CRef<objects::CSeq_feat> MakemRNAForCDS (CRef<objects::CSeq_feat> feat)
1498 {
1499 CRef<objects::CSeq_feat> mrna(new objects::CSeq_feat);
1500 mrna->SetData().SetRna().SetType(objects::CRNA_ref::eType_mRNA);
1501 mrna->SetLocation().Assign(feat->GetLocation());
1502 return mrna;
1503 }
1504
1505
BuildGoodGenProdSet()1506 CRef<objects::CSeq_entry> BuildGoodGenProdSet()
1507 {
1508 CRef<objects::CSeq_entry> entry(new objects::CSeq_entry());
1509 entry->SetSet().SetClass(objects::CBioseq_set::eClass_gen_prod_set);
1510 CRef<objects::CSeq_entry> contig = BuildGoodSeq();
1511 contig->SetSeq().SetInst().SetSeq_data().SetIupacna().Set("ATGCCCAGAAAAACAGAGATAAACTAAGGGATGCCCAGAAAAACAGAGATAAACTAAGGG");
1512 contig->SetSeq().SetInst().SetLength(60);
1513 entry->SetSet().SetSeq_set().push_back (contig);
1514 CRef<objects::CSeq_id> nuc_id(new objects::CSeq_id());
1515 nuc_id->SetLocal().SetStr("nuc");
1516 CRef<objects::CSeq_id> prot_id(new objects::CSeq_id());
1517 prot_id->SetLocal().SetStr("prot");
1518 CRef<objects::CSeq_entry> np = BuildGenProdSetNucProtSet(nuc_id, prot_id);
1519 entry->SetSet().SetSeq_set().push_back (np);
1520
1521 CRef<objects::CSeq_feat> cds(new objects::CSeq_feat());
1522 cds->Assign (*(GetCDSFromGoodNucProtSet(np)));
1523 cds->SetLocation().SetInt().SetId().SetLocal().SetStr("good");
1524 AddFeat (cds, contig);
1525 CRef<objects::CSeq_feat> mrna = MakemRNAForCDS(cds);
1526 mrna->SetProduct().SetWhole().Assign(*nuc_id);
1527 AddFeat (mrna, contig);
1528
1529 return entry;
1530 }
1531
1532
GetGenomicFromGenProdSet(CRef<objects::CSeq_entry> entry)1533 CRef<objects::CSeq_entry> GetGenomicFromGenProdSet (CRef<objects::CSeq_entry> entry)
1534 {
1535 return entry->SetSet().SetSeq_set().front();
1536 }
1537
1538
GetmRNAFromGenProdSet(CRef<objects::CSeq_entry> entry)1539 CRef<objects::CSeq_feat> GetmRNAFromGenProdSet(CRef<objects::CSeq_entry> entry)
1540 {
1541 CRef<objects::CSeq_entry> genomic = GetGenomicFromGenProdSet(entry);
1542 CRef<objects::CSeq_feat> mrna = genomic->SetSeq().SetAnnot().front()->SetData().SetFtable().back();
1543 return mrna;
1544 }
1545
1546
GetNucProtSetFromGenProdSet(CRef<objects::CSeq_entry> entry)1547 CRef<objects::CSeq_entry> GetNucProtSetFromGenProdSet(CRef<objects::CSeq_entry> entry)
1548 {
1549 return entry->SetSet().SetSeq_set().back();
1550 }
1551
1552
GetCDSFromGenProdSet(CRef<objects::CSeq_entry> entry)1553 CRef<objects::CSeq_feat> GetCDSFromGenProdSet (CRef<objects::CSeq_entry> entry)
1554 {
1555 CRef<objects::CSeq_entry> genomic = GetGenomicFromGenProdSet(entry);
1556 CRef<objects::CSeq_feat> cds = genomic->SetSeq().SetAnnot().front()->SetData().SetFtable().front();
1557 return cds;
1558 }
1559
1560
RevComp(objects::CBioseq & bioseq)1561 void RevComp (objects::CBioseq& bioseq)
1562 {
1563 if (!bioseq.IsNa() || !bioseq.IsSetInst()
1564 || !bioseq.GetInst().IsSetSeq_data()
1565 || !bioseq.GetInst().GetSeq_data().IsIupacna()) {
1566 return;
1567 }
1568 string seq = bioseq.GetInst().GetSeq_data().GetIupacna().Get();
1569 string new_seq = "";
1570 string::iterator sit = seq.end();
1571 while (sit != seq.begin()) {
1572 --sit;
1573 string new_ch = "";
1574 new_ch += *sit;
1575 if (NStr::Equal(new_ch, "A")) {
1576 new_ch = "T";
1577 } else if (NStr::Equal(new_ch, "T")) {
1578 new_ch = "A";
1579 } else if (NStr::Equal(new_ch, "G")) {
1580 new_ch = "C";
1581 } else if (NStr::Equal(new_ch, "C")) {
1582 new_ch = "G";
1583 }
1584 new_seq.append(new_ch);
1585 }
1586
1587 bioseq.SetInst().SetSeq_data().SetIupacna().Set(new_seq);
1588 size_t len = bioseq.GetLength();
1589 if (bioseq.IsSetAnnot()) {
1590 EDIT_EACH_SEQFEAT_ON_SEQANNOT (feat_it, *(bioseq.SetAnnot().front())) {
1591 TSeqPos new_from = len - (*feat_it)->GetLocation().GetInt().GetTo() - 1;
1592 TSeqPos new_to = len - (*feat_it)->GetLocation().GetInt().GetFrom() - 1;
1593 (*feat_it)->SetLocation().SetInt().SetFrom(new_from);
1594 (*feat_it)->SetLocation().SetInt().SetTo(new_to);
1595 if ((*feat_it)->GetLocation().GetInt().IsSetStrand()
1596 && (*feat_it)->GetLocation().GetInt().GetStrand() == objects::eNa_strand_minus) {
1597 (*feat_it)->SetLocation().SetInt().SetStrand(objects::eNa_strand_plus);
1598 } else {
1599 (*feat_it)->SetLocation().SetInt().SetStrand(objects::eNa_strand_minus);
1600 }
1601 }
1602 }
1603 }
1604
1605
RevComp(objects::CSeq_loc & loc,size_t len)1606 void RevComp (objects::CSeq_loc& loc, size_t len)
1607 {
1608 if (loc.IsInt()) {
1609 TSeqPos new_from = len - loc.GetInt().GetTo() - 1;
1610 TSeqPos new_to = len - loc.GetInt().GetFrom() - 1;
1611 loc.SetInt().SetFrom(new_from);
1612 loc.SetInt().SetTo(new_to);
1613 if (loc.GetInt().IsSetStrand()
1614 && loc.GetInt().GetStrand() == eNa_strand_minus) {
1615 loc.SetInt().SetStrand(eNa_strand_plus);
1616 } else {
1617 loc.SetInt().SetStrand(eNa_strand_minus);
1618 }
1619 } else if (loc.IsMix()) {
1620 NON_CONST_ITERATE (objects::CSeq_loc_mix::Tdata, it, loc.SetMix().Set()) {
1621 RevComp (**it, len);
1622 }
1623 }
1624 }
1625
1626
RevComp(CRef<objects::CSeq_entry> entry)1627 void RevComp (CRef<objects::CSeq_entry> entry)
1628 {
1629 if (entry->IsSeq()) {
1630 RevComp(entry->SetSeq());
1631 } else if (entry->IsSet()) {
1632 if (entry->GetSet().IsSetClass()
1633 && entry->GetSet().GetClass() == objects::CBioseq_set::eClass_nuc_prot) {
1634 RevComp(entry->SetSet().SetSeq_set().front());
1635 size_t len = entry->GetSet().GetSeq_set().front()->GetSeq().GetLength();
1636 EDIT_EACH_SEQFEAT_ON_SEQANNOT (feat_it, *(entry->SetSet().SetAnnot().front())) {
1637 RevComp ((*feat_it)->SetLocation(), len);
1638 }
1639 }
1640 }
1641 }
1642
1643
BuildGoodDeltaSeq(void)1644 CRef<objects::CSeq_entry> BuildGoodDeltaSeq(void)
1645 {
1646 CRef<objects::CSeq_entry> entry = BuildGoodSeq();
1647
1648 entry->SetSeq().SetInst().ResetSeq_data();
1649 entry->SetSeq().SetInst().SetRepr(objects::CSeq_inst::eRepr_delta);
1650 entry->SetSeq().SetInst().SetExt().SetDelta().AddLiteral("ATGATGATGCCC", objects::CSeq_inst::eMol_dna);
1651 CRef<objects::CDelta_seq> gap_seg(new objects::CDelta_seq());
1652 gap_seg->SetLiteral().SetSeq_data().SetGap();
1653 gap_seg->SetLiteral().SetLength(10);
1654 entry->SetSeq().SetInst().SetExt().SetDelta().Set().push_back(gap_seg);
1655 entry->SetSeq().SetInst().SetExt().SetDelta().AddLiteral("CCCATGATGATG", objects::CSeq_inst::eMol_dna);
1656 entry->SetSeq().SetInst().SetLength(34);
1657
1658 return entry;
1659 }
1660
1661
RemoveDeltaSeqGaps(CRef<objects::CSeq_entry> entry)1662 void RemoveDeltaSeqGaps(CRef<objects::CSeq_entry> entry)
1663 {
1664 objects::CDelta_ext::Tdata::iterator seg_it = entry->SetSeq().SetInst().SetExt().SetDelta().Set().begin();
1665 while (seg_it != entry->SetSeq().SetInst().SetExt().SetDelta().Set().end()) {
1666 if ((*seg_it)->IsLiteral()
1667 && (!(*seg_it)->GetLiteral().IsSetSeq_data()
1668 || (*seg_it)->GetLiteral().GetSeq_data().IsGap())) {
1669 TSeqPos len = entry->SetSeq().SetInst().GetLength();
1670 len -= (*seg_it)->GetLiteral().GetLength();
1671 seg_it = entry->SetSeq().SetInst().SetExt().SetDelta().Set().erase(seg_it);
1672 entry->SetSeq().SetInst().SetLength(len);
1673 } else {
1674 ++seg_it;
1675 }
1676 }
1677 }
1678
1679
AddToDeltaSeq(CRef<objects::CSeq_entry> entry,string seq)1680 void AddToDeltaSeq(CRef<objects::CSeq_entry> entry, string seq)
1681 {
1682 size_t orig_len = entry->GetSeq().GetLength();
1683 size_t add_len = seq.length();
1684
1685 CRef<objects::CDelta_seq> gap_seg(new objects::CDelta_seq());
1686 gap_seg->SetLiteral().SetSeq_data().SetGap();
1687 gap_seg->SetLiteral().SetLength(10);
1688 entry->SetSeq().SetInst().SetExt().SetDelta().Set().push_back(gap_seg);
1689 entry->SetSeq().SetInst().SetExt().SetDelta().AddLiteral(seq, objects::CSeq_inst::eMol_dna);
1690 entry->SetSeq().SetInst().SetLength(orig_len + 10 + add_len);
1691 }
1692
1693
BuildSegSetPart(string id_str)1694 CRef<objects::CSeq_entry> BuildSegSetPart(string id_str)
1695 {
1696 CRef<objects::CSeq_entry> part(new objects::CSeq_entry());
1697 part->SetSeq().SetInst().SetMol(objects::CSeq_inst::eMol_dna);
1698 part->SetSeq().SetInst().SetRepr(objects::CSeq_inst::eRepr_raw);
1699 part->SetSeq().SetInst().SetSeq_data().SetIupacna().Set("AATTGGCCAAAATTGGCCAAAATTGGCCAAAATTGGCCAAAATTGGCCAAAATTGGCCAA");
1700 part->SetSeq().SetInst().SetLength(60);
1701 CRef<objects::CSeq_id> id(new objects::CSeq_id(id_str));
1702 part->SetSeq().SetId().push_back(id);
1703 SetBiomol(part, objects::CMolInfo::eBiomol_genomic);
1704 return part;
1705 }
1706
1707
BuildGoodSegSet(void)1708 CRef<objects::CSeq_entry> BuildGoodSegSet(void)
1709 {
1710 CRef<objects::CSeq_entry> segset(new objects::CSeq_entry());
1711 segset->SetSet().SetClass(objects::CBioseq_set::eClass_segset);
1712
1713 CRef<objects::CSeq_entry> seg_seq(new objects::CSeq_entry());
1714 seg_seq->SetSeq().SetInst().SetMol(objects::CSeq_inst::eMol_dna);
1715 seg_seq->SetSeq().SetInst().SetRepr(objects::CSeq_inst::eRepr_seg);
1716
1717 CRef<objects::CSeq_loc> loc1(new objects::CSeq_loc());
1718 loc1->SetWhole().SetLocal().SetStr("part1");
1719 CRef<objects::CSeq_loc> loc2(new objects::CSeq_loc());
1720 loc2->SetWhole().SetLocal().SetStr("part2");
1721 CRef<objects::CSeq_loc> loc3(new objects::CSeq_loc());
1722 loc3->SetWhole().SetLocal().SetStr("part3");
1723
1724 seg_seq->SetSeq().SetInst().SetExt().SetSeg().Set().push_back(loc1);
1725 seg_seq->SetSeq().SetInst().SetExt().SetSeg().Set().push_back(loc2);
1726 seg_seq->SetSeq().SetInst().SetExt().SetSeg().Set().push_back(loc3);
1727 seg_seq->SetSeq().SetInst().SetLength(180);
1728
1729 CRef<objects::CSeq_id> id(new objects::CSeq_id());
1730 id->SetLocal().SetStr ("master");
1731 seg_seq->SetSeq().SetId().push_back(id);
1732 seg_seq->SetSeq().SetInst().SetLength(180);
1733 SetBiomol(seg_seq, objects::CMolInfo::eBiomol_genomic);
1734
1735 segset->SetSet().SetSeq_set().push_back(seg_seq);
1736
1737 // create parts set
1738 CRef<objects::CSeq_entry> parts_set(new objects::CSeq_entry());
1739 parts_set->SetSet().SetClass(objects::CBioseq_set::eClass_parts);
1740 parts_set->SetSet().SetSeq_set().push_back(BuildSegSetPart("lcl|part1"));
1741 parts_set->SetSet().SetSeq_set().push_back(BuildSegSetPart("lcl|part2"));
1742 parts_set->SetSet().SetSeq_set().push_back(BuildSegSetPart("lcl|part3"));
1743
1744 segset->SetSet().SetSeq_set().push_back(parts_set);
1745
1746 // CRef<objects::CSeqdesc> pdesc(new objects::CSeqdesc());
1747 // CRef<objects::CPub> pub(new objects::CPub());
1748 // pub->SetPmid((objects::CPub::TPmid)1);
1749 // pdesc->SetPub().SetPub().Set().push_back(pub);
1750 // segset->SetDescr().Set().push_back(pdesc);
1751 AddGoodPub(segset);
1752 CRef<objects::CSeqdesc> odesc(new objects::CSeqdesc());
1753 odesc->SetSource().SetOrg().SetTaxname("Sebaea microphylla");
1754 odesc->SetSource().SetOrg().SetOrgname().SetLineage("some lineage");
1755 CRef<objects::CDbtag> taxon_id(new objects::CDbtag());
1756 taxon_id->SetDb("taxon");
1757 taxon_id->SetTag().SetId(592768);
1758 odesc->SetSource().SetOrg().SetDb().push_back(taxon_id);
1759 CRef<objects::CSubSource> subsrc(new objects::CSubSource());
1760 subsrc->SetSubtype(objects::CSubSource::eSubtype_chromosome);
1761 subsrc->SetName("1");
1762 odesc->SetSource().SetSubtype().push_back(subsrc);
1763 segset->SetDescr().Set().push_back(odesc);
1764
1765 return segset;
1766 }
1767
1768
BuildGoodEcoSet()1769 CRef<objects::CSeq_entry> BuildGoodEcoSet()
1770 {
1771 CRef<objects::CSeq_entry> entry(new objects::CSeq_entry());
1772 entry->SetSet().SetClass(objects::CBioseq_set::eClass_eco_set);
1773 CRef<objects::CSeq_entry> seq1 = BuildGoodSeq();
1774 ChangeId(seq1, "1");
1775 CRef<objects::CSeq_entry> seq2 = BuildGoodSeq();
1776 ChangeId(seq2, "2");
1777 CRef<objects::CSeq_entry> seq3 = BuildGoodSeq();
1778 ChangeId(seq3, "3");
1779 entry->SetSet().SetSeq_set().push_back(seq1);
1780 entry->SetSet().SetSeq_set().push_back(seq2);
1781 entry->SetSet().SetSeq_set().push_back(seq3);
1782
1783 CRef<objects::CSeqdesc> desc(new objects::CSeqdesc());
1784 desc->SetTitle("popset title");
1785 entry->SetSet().SetDescr().Set().push_back(desc);
1786
1787 return entry;
1788 }
1789
1790
BuildGoodEcoSetWithAlign(size_t front_insert)1791 CRef<objects::CSeq_entry> BuildGoodEcoSetWithAlign(size_t front_insert)
1792 {
1793 CRef<CSeq_entry> entry = BuildGoodEcoSet();
1794
1795 CRef<objects::CSeq_align> align(new CSeq_align());
1796 align->SetType(objects::CSeq_align::eType_global);
1797 align->SetDim(entry->GetSet().GetSeq_set().size());
1798 size_t offset = 0;
1799 for (auto& s : entry->SetSet().SetSeq_set()) {
1800 CRef<CSeq_id> id(new CSeq_id());
1801 id->Assign(*(s->GetSeq().GetId().front()));
1802 align->SetSegs().SetDenseg().SetIds().push_back(id);
1803 if (offset > 0) {
1804 const string& orig = s->SetSeq().SetInst().SetSeq_data().SetIupacna().Set();
1805 size_t orig_len = s->GetSeq().GetInst().GetLength();
1806 string add = "";
1807 for (auto i = (size_t)0; i < offset; i++) {
1808 add += "A";
1809 }
1810 s->SetSeq().SetInst().SetSeq_data().SetIupacna().Set(add + orig);
1811 s->SetSeq().SetInst().SetLength(orig_len + offset);
1812 }
1813 align->SetSegs().SetDenseg().SetStarts().push_back(offset);
1814 offset += front_insert;
1815 }
1816 align->SetSegs().SetDenseg().SetNumseg(1);
1817 align->SetSegs().SetDenseg().SetLens().push_back(entry->GetSet().GetSeq_set().front()->GetSeq().GetInst().GetLength());
1818 align->SetSegs().SetDenseg().SetDim(3);
1819
1820 CRef<CSeq_annot> annot(new CSeq_annot());
1821 annot->SetData().SetAlign().push_back(align);
1822 entry->SetSet().SetAnnot().push_back(annot);
1823 return entry;
1824 }
1825
1826
1827 // assumes that sequence has been reverse-complemented
ReverseAlignmentStrand(CDense_seg & denseg,size_t pos,size_t seq_len)1828 void ReverseAlignmentStrand(CDense_seg& denseg, size_t pos, size_t seq_len)
1829 {
1830 // prepopulate the strand array if not already present
1831 auto num_pieces = denseg.GetDim() * denseg.GetNumseg();
1832 if (!denseg.IsSetStrands()) {
1833 for (auto i = 0; i < num_pieces; i++) {
1834 denseg.SetStrands().push_back(eNa_strand_plus);
1835 }
1836 } else if (denseg.GetStrands().size() < num_pieces) {
1837 for (auto i = denseg.GetStrands().size(); i < num_pieces; i++) {
1838 denseg.SetStrands().push_back(eNa_strand_plus);
1839 }
1840 }
1841 for (auto i = 0; i < denseg.GetNumseg(); i++) {
1842 auto offset = i * denseg.GetDim() + pos;
1843 auto orig = denseg.GetStarts()[offset];
1844 if (orig > -1) {
1845 denseg.SetStarts()[offset] = seq_len - orig - denseg.GetLens()[i];
1846 }
1847 if (denseg.GetStrands()[offset] == eNa_strand_minus) {
1848 denseg.SetStrands()[offset] = eNa_strand_plus;
1849 } else {
1850 denseg.SetStrands()[offset] = eNa_strand_minus;
1851 }
1852 }
1853 }
1854
1855
BuildGoodAlign()1856 CRef<objects::CSeq_align> BuildGoodAlign()
1857 {
1858 CRef<objects::CSeq_align> align(new objects::CSeq_align());
1859 CRef<objects::CSeq_id> id1(new objects::CSeq_id());
1860 id1->SetGenbank().SetAccession("FJ375734.2");
1861 id1->SetGenbank().SetVersion(2);
1862 CRef<objects::CSeq_id> id2(new objects::CSeq_id());
1863 id2->SetGenbank().SetAccession("FJ375735.2");
1864 id2->SetGenbank().SetVersion(2);
1865 align->SetDim(2);
1866 align->SetType(objects::CSeq_align::eType_global);
1867 align->SetSegs().SetDenseg().SetIds().push_back(id1);
1868 align->SetSegs().SetDenseg().SetIds().push_back(id2);
1869 align->SetSegs().SetDenseg().SetDim(2);
1870 align->SetSegs().SetDenseg().SetStarts().push_back(0);
1871 align->SetSegs().SetDenseg().SetStarts().push_back(0);
1872 align->SetSegs().SetDenseg().SetNumseg(1);
1873 align->SetSegs().SetDenseg().SetLens().push_back(812);
1874
1875 return align;
1876 }
1877
1878
BuildGoodGraphAnnot(string id)1879 CRef<objects::CSeq_annot> BuildGoodGraphAnnot(string id)
1880 {
1881 CRef<objects::CSeq_graph> graph(new objects::CSeq_graph());
1882 graph->SetLoc().SetInt().SetFrom(0);
1883 graph->SetLoc().SetInt().SetTo(10);
1884 graph->SetLoc().SetInt().SetId().SetLocal().SetStr(id);
1885
1886 CRef<objects::CSeq_annot> annot(new objects::CSeq_annot());
1887 annot->SetData().SetGraph().push_back(graph);
1888
1889 return annot;
1890 }
1891
1892
RemoveDescriptorType(CRef<objects::CSeq_entry> entry,objects::CSeqdesc::E_Choice desc_choice)1893 void RemoveDescriptorType (CRef<objects::CSeq_entry> entry, objects::CSeqdesc::E_Choice desc_choice)
1894 {
1895 EDIT_EACH_DESCRIPTOR_ON_SEQENTRY (dit, *entry) {
1896 if ((*dit)->Which() == desc_choice) {
1897 ERASE_DESCRIPTOR_ON_SEQENTRY (dit, *entry);
1898 }
1899 }
1900 }
1901
1902
BuildtRNA(CRef<objects::CSeq_id> id)1903 CRef<objects::CSeq_feat> BuildtRNA(CRef<objects::CSeq_id> id)
1904 {
1905 CRef<objects::CSeq_feat> feat(new objects::CSeq_feat());
1906 feat->SetLocation().SetInt().SetId().Assign(*id);
1907 feat->SetLocation().SetInt().SetFrom(0);
1908 feat->SetLocation().SetInt().SetTo(10);
1909
1910 feat->SetData().SetRna().SetType(CRNA_ref::eType_tRNA);
1911 feat->SetData().SetRna().SetExt().SetTRNA().SetAa().SetIupacaa('N');
1912 feat->SetData().SetRna().SetExt().SetTRNA().SetAnticodon().SetInt().SetId().Assign(*id);
1913 feat->SetData().SetRna().SetExt().SetTRNA().SetAnticodon().SetInt().SetFrom(11);
1914 feat->SetData().SetRna().SetExt().SetTRNA().SetAnticodon().SetInt().SetTo(13);
1915
1916 return feat;
1917 }
1918
1919
BuildGoodtRNA(CRef<objects::CSeq_id> id)1920 CRef<objects::CSeq_feat> BuildGoodtRNA(CRef<objects::CSeq_id> id)
1921 {
1922 CRef<objects::CSeq_feat> trna = BuildtRNA(id);
1923 trna->SetData().SetRna().SetExt().SetTRNA().SetAnticodon().SetInt().SetFrom(8);
1924 trna->SetData().SetRna().SetExt().SetTRNA().SetAnticodon().SetInt().SetTo(10);
1925 trna->SetData().SetRna().SetExt().SetTRNA().SetAa().SetIupacaa('F');
1926 return trna;
1927 }
1928
1929
MakeMixLoc(CRef<objects::CSeq_id> id)1930 CRef<objects::CSeq_loc> MakeMixLoc (CRef<objects::CSeq_id> id)
1931 {
1932 CRef<objects::CSeq_loc> loc1(new objects::CSeq_loc());
1933 loc1->SetInt().SetFrom(0);
1934 loc1->SetInt().SetTo(15);
1935 loc1->SetInt().SetId().Assign(*id);
1936 CRef<objects::CSeq_loc> loc2(new objects::CSeq_loc());
1937 loc2->SetInt().SetFrom(46);
1938 loc2->SetInt().SetTo(56);
1939 loc2->SetInt().SetId().Assign(*id);
1940 CRef<objects::CSeq_loc> mixloc(new objects::CSeq_loc());
1941 mixloc->SetMix().Set().push_back(loc1);
1942 mixloc->SetMix().Set().push_back(loc2);
1943 return mixloc;
1944 }
1945
1946
MakeIntronForMixLoc(CRef<objects::CSeq_id> id)1947 CRef<objects::CSeq_feat> MakeIntronForMixLoc (CRef<objects::CSeq_id> id)
1948 {
1949 CRef<objects::CSeq_feat> intron (new objects::CSeq_feat());
1950 intron->SetData().SetImp().SetKey("intron");
1951 intron->SetLocation().SetInt().SetFrom(16);
1952 intron->SetLocation().SetInt().SetTo(45);
1953 intron->SetLocation().SetInt().SetId().Assign(*id);
1954 return intron;
1955 }
1956
1957
SetSpliceForMixLoc(objects::CBioseq & seq)1958 void SetSpliceForMixLoc (objects::CBioseq& seq)
1959 {
1960 seq.SetInst().SetSeq_data().SetIupacna().Set()[16] = 'G';
1961 seq.SetInst().SetSeq_data().SetIupacna().Set()[17] = 'T';
1962 seq.SetInst().SetSeq_data().SetIupacna().Set()[44] = 'A';
1963 seq.SetInst().SetSeq_data().SetIupacna().Set()[45] = 'G';
1964 }
1965
1966
MakeGeneForFeature(CRef<objects::CSeq_feat> feat)1967 CRef<objects::CSeq_feat> MakeGeneForFeature (CRef<objects::CSeq_feat> feat)
1968 {
1969 CRef<objects::CSeq_feat> gene(new objects::CSeq_feat());
1970 gene->SetData().SetGene().SetLocus("gene locus");
1971 gene->SetLocation().SetInt().SetId().Assign(*(feat->GetLocation().GetId()));
1972 gene->SetLocation().SetInt().SetStrand(feat->GetLocation().GetStrand());
1973 gene->SetLocation().SetInt().SetFrom(feat->GetLocation().GetStart(objects::eExtreme_Positional));
1974 gene->SetLocation().SetInt().SetTo(feat->GetLocation().GetStop(objects::eExtreme_Positional));
1975 gene->SetLocation().SetPartialStart(feat->GetLocation().IsPartialStart(objects::eExtreme_Positional), objects::eExtreme_Positional);
1976 gene->SetLocation().SetPartialStop(feat->GetLocation().IsPartialStop(objects::eExtreme_Positional), objects::eExtreme_Positional);
1977 if (feat->IsSetPartial() && feat->GetPartial()) {
1978 gene->SetPartial(true);
1979 }
1980 return gene;
1981 }
1982
1983
AddGoodImpFeat(CRef<objects::CSeq_entry> entry,string key)1984 CRef<objects::CSeq_feat> AddGoodImpFeat (CRef<objects::CSeq_entry> entry, string key)
1985 {
1986 CRef<objects::CSeq_feat> imp_feat = AddMiscFeature (entry, 10);
1987 imp_feat->SetData().SetImp().SetKey(key);
1988 if (NStr::Equal(key, "conflict")) {
1989 imp_feat->AddQualifier("citation", "1");
1990 } else if (NStr::Equal(key, "intron")) {
1991 entry->SetSeq().SetInst().SetSeq_data().SetIupacna().Set()[0] = 'G';
1992 entry->SetSeq().SetInst().SetSeq_data().SetIupacna().Set()[1] = 'T';
1993 entry->SetSeq().SetInst().SetSeq_data().SetIupacna().Set()[9] = 'A';
1994 entry->SetSeq().SetInst().SetSeq_data().SetIupacna().Set()[10] = 'G';
1995 } else if (NStr::Equal(key, "misc_binding") || NStr::Equal(key, "protein_bind")) {
1996 imp_feat->AddQualifier("bound_moiety", "foo");
1997 } else if (NStr::Equal(key, "modified_base")) {
1998 imp_feat->AddQualifier("mod_base", "foo");
1999 } else if (NStr::Equal(key, "old_sequence")) {
2000 imp_feat->AddQualifier("citation", "1");
2001 } else if (NStr::Equal(key, "operon")) {
2002 imp_feat->AddQualifier("operon", "foo");
2003 } else if (NStr::Equal(key, "polyA_site")) {
2004 imp_feat->SetLocation().SetPnt().SetId().SetLocal().SetStr("good");
2005 imp_feat->SetLocation().SetPnt().SetPoint(5);
2006 } else if (NStr::Equal(key, "source")) {
2007 imp_feat->AddQualifier("organism", "foo");
2008 }
2009 return imp_feat;
2010 }
2011
2012 // helper classes for TraverseAndRunTestCases
2013 namespace {
2014
2015 // This just accumulates all the files in the path
2016 struct SFileRememberer
2017 {
operator ()__anon69a043f40111::SFileRememberer2018 void operator()( const CDirEntry & dir_entry ) {
2019 m_filesFound.push_back(CFile(dir_entry));
2020 }
2021
2022 vector<CFile> m_filesFound;
2023 };
2024
2025 // a simple function object that extracts the
2026 // first of a pair. (Unfortunately, "select1st" is not part of
2027 // the STL standard so it can't be relied upon to exist)
2028 template<typename Pair>
2029 struct SFirstOfPair
2030 {
operator ()__anon69a043f40111::SFirstOfPair2031 typename Pair::first_type operator()( const Pair & a_pair ) const
2032 {
2033 return a_pair.first;
2034 }
2035 };
2036 }
2037
TraverseAndRunTestCases(ITestRunner * pTestRunner,CDir dirWithTestCases,const set<string> & setOfRequiredSuffixes,const set<string> & setOfOptionalSuffixes,const set<string> & setOfIgnoredSuffixes,TTraverseAndRunTestCasesFlags fFlags)2038 void TraverseAndRunTestCases(
2039 ITestRunner *pTestRunner,
2040 CDir dirWithTestCases,
2041 const set<string> & setOfRequiredSuffixes,
2042 const set<string> & setOfOptionalSuffixes,
2043 const set<string> & setOfIgnoredSuffixes,
2044 TTraverseAndRunTestCasesFlags fFlags )
2045 {
2046 if( ! pTestRunner ) {
2047 NCBI_USER_THROW_FMT("NULL pTestRunner");
2048 }
2049 if( ! dirWithTestCases.Exists() ) {
2050 pTestRunner->OnError("Top-level test-cases dir not found: " + dirWithTestCases.GetPath() );
2051 return;
2052 }
2053 if( ! dirWithTestCases.IsDir() ) {
2054 pTestRunner->OnError("Top-level test-cases dir is actually not a dir: " + dirWithTestCases.GetPath() );
2055 return;
2056 }
2057
2058 const vector<string> kEmptyVectorOfStrings;
2059
2060 SFileRememberer fileRememberer;
2061 FindFilesInDir(
2062 dirWithTestCases,
2063 kEmptyVectorOfStrings,
2064 kEmptyVectorOfStrings,
2065 fileRememberer,
2066 fFF_File | fFF_Recursive );
2067
2068 // this is what we search for to see if there is a hidden directory
2069 // or file anywhere along the path.
2070 const string kHiddenSubstring = CDirEntry::GetPathSeparator() + string(".svn") + CDirEntry::GetPathSeparator();
2071
2072 typedef map<string, ITestRunner::TMapSuffixToFile> TMapTestNameToItsFiles;
2073 TMapTestNameToItsFiles mapTestNameToItsFiles;
2074 // this loop loads mapTestNameToItsFiles
2075 ITERATE( vector<CFile>, file_it, fileRememberer.m_filesFound ) {
2076 const string sFileName = file_it->GetName();
2077 const string sFileAbsPath = CDirEntry::CreateAbsolutePath(file_it->GetPath());
2078
2079 // hidden folders or files of any kind are silently ignored
2080 if( NStr::Find(sFileAbsPath, kHiddenSubstring) != NPOS ) {
2081 continue;
2082 }
2083
2084 if( ! (fFlags & fTraverseAndRunTestCasesFlags_DoNOTIgnoreREADMEFiles) &&
2085 NStr::StartsWith(sFileName, "README") )
2086 {
2087 // if requested, silently ignore files starting with README
2088 continue;
2089 }
2090
2091 // extract out testname and suffix
2092 string sTestName;
2093 string sSuffix;
2094 NStr::SplitInTwo(sFileName, ".", sTestName, sSuffix);
2095 if( sTestName.empty() || sSuffix.empty() ) {
2096 pTestRunner->OnError("Bad file name: " + file_it->GetPath());
2097 continue;
2098 }
2099
2100 if( setOfIgnoredSuffixes.find(sSuffix) != setOfIgnoredSuffixes.end() ) {
2101 // silently ignores suffixes requested to be ignored by the user
2102 continue;
2103 }
2104
2105 // load this entry, with error if not inserted
2106 const bool bWasInserted =
2107 mapTestNameToItsFiles[sTestName].insert(make_pair(sSuffix, *file_it)).second;
2108 if( ! bWasInserted ) {
2109 pTestRunner->OnError(
2110 "File with same name appears multiple times in different dirs: " +
2111 file_it->GetPath() );
2112 continue;
2113 }
2114 }
2115
2116 // sanity check all tests and remove the unusable ones
2117 ERASE_ITERATE(TMapTestNameToItsFiles, test_it, mapTestNameToItsFiles) {
2118 const string & sTestName = test_it->first;
2119 const ITestRunner::TMapSuffixToFile & mapSuffixToFile =
2120 test_it->second;
2121
2122 // get the keys (that is, the suffixes) of the map
2123 set<string> setOfAllSuffixes;
2124 transform(mapSuffixToFile.begin(), mapSuffixToFile.end(),
2125 inserter(setOfAllSuffixes, setOfAllSuffixes.begin()),
2126 SFirstOfPair<ITestRunner::TMapSuffixToFile::value_type>() );
2127
2128 // get the non-required suffixes that were used
2129 set<string> setOfNonRequiredSuffixes;
2130 set_difference( setOfAllSuffixes.begin(), setOfAllSuffixes.end(),
2131 setOfRequiredSuffixes.begin(), setOfRequiredSuffixes.end(),
2132 inserter(setOfNonRequiredSuffixes, setOfNonRequiredSuffixes.begin() ) );
2133
2134 // make sure it has all required suffixes
2135 // (the set of suffixes should have shrunk by exactly the number of required
2136 // suffixes on the set_difference just above)
2137 const size_t szNumOfSuffixes = setOfAllSuffixes.size();
2138 const size_t szNumOfNonRequiredSuffixes = setOfNonRequiredSuffixes.size();
2139 if( (szNumOfSuffixes - szNumOfNonRequiredSuffixes) != setOfRequiredSuffixes.size() )
2140 {
2141 pTestRunner->OnError("Skipping this test because it's missing some files: " + sTestName);
2142 mapTestNameToItsFiles.erase(test_it);
2143 continue;
2144 }
2145
2146 // all non-required suffixes should be in the optional set
2147 if( ! includes( setOfOptionalSuffixes.begin(), setOfOptionalSuffixes.end(),
2148 setOfNonRequiredSuffixes.begin(), setOfNonRequiredSuffixes.end() ) )
2149 {
2150 pTestRunner->OnError("Skipping this test because it has unexpected suffix(es): " + sTestName);
2151 mapTestNameToItsFiles.erase(test_it);
2152 continue;
2153 }
2154 }
2155
2156 // there should be at least one test to run
2157 if( mapTestNameToItsFiles.empty() ) {
2158 pTestRunner->OnError("There are no tests to run");
2159 return;
2160 }
2161
2162 // Now, actually run the tests
2163 ITERATE(TMapTestNameToItsFiles, test_it, mapTestNameToItsFiles) {
2164 const string & sTestName = test_it->first;
2165 const ITestRunner::TMapSuffixToFile & mapSuffixToFile =
2166 test_it->second;
2167
2168 cerr << "Running test: " << sTestName << endl;
2169 pTestRunner->RunTest(sTestName, mapSuffixToFile);
2170 }
2171 }
2172
2173 END_SCOPE(unit_test_util)
2174 END_SCOPE(objects)
2175 END_NCBI_SCOPE
2176