1 /* $Id: write_util.cpp 637282 2021-09-09 19:27:15Z ivanov $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Authors: Frank Ludwig
27 *
28 * File Description: Write gff file
29 *
30 */
31
32 #include <ncbi_pch.hpp>
33 #include <objmgr/seqdesc_ci.hpp>
34 #include <objects/seq/Seq_annot.hpp>
35 #include <objects/seq/Annot_descr.hpp>
36 #include <objects/seq/seqport_util.hpp>
37 #include <objects/general/User_object.hpp>
38 #include <objects/general/Object_id.hpp>
39 #include <objects/general/Dbtag.hpp>
40 #include <objects/seqfeat/BioSource.hpp>
41 #include <objects/seqfeat/OrgName.hpp>
42 #include <objects/seqfeat/OrgMod.hpp>
43 #include <objects/seqfeat/SubSource.hpp>
44 #include <objtools/writers/write_util.hpp>
45 #include <objtools/writers/feature_context.hpp>
46 #include <objmgr/util/sequence.hpp>
47
48 #include <objtools/writers/writer_exception.hpp>
49 #include <objtools/writers/genbank_id_resolve.hpp>
50
51 BEGIN_NCBI_SCOPE
52 USING_SCOPE(objects);
53
54 // ----------------------------------------------------------------------------
GetDescriptor(const CSeq_annot & annot,const string & strType)55 CRef<CUser_object> CWriteUtil::GetDescriptor(
56 const CSeq_annot& annot,
57 const string& strType )
58 // ----------------------------------------------------------------------------
59 {
60 CRef< CUser_object > pUser;
61 if (!annot.IsSetDesc()) {
62 return pUser;
63 }
64
65 const list<CRef<CAnnotdesc> > descriptors = annot.GetDesc().Get();
66 list<CRef<CAnnotdesc> >::const_iterator it;
67 for (it = descriptors.begin(); it != descriptors.end(); ++it) {
68 if (!(*it)->IsUser()) {
69 continue;
70 }
71 const CUser_object& user = (*it)->GetUser();
72 if (user.GetType().GetStr() == strType) {
73 pUser.Reset(new CUser_object);
74 pUser->Assign(user);
75 return pUser;
76 }
77 }
78 return pUser;
79 }
80
81 // ----------------------------------------------------------------------------
GetGenomeString(const CBioSource & bs,string & genome_str)82 bool CWriteUtil::GetGenomeString(
83 const CBioSource& bs,
84 string& genome_str )
85 // ----------------------------------------------------------------------------
86 {
87 #define EMIT(str) { genome_str = str; return true; }
88
89 if (!bs.IsSetGenome()) {
90 return false;
91 }
92 switch (bs.GetGenome()) {
93 default:
94 return false;
95 case CBioSource::eGenome_apicoplast: EMIT("apicoplast");
96 case CBioSource::eGenome_chloroplast: EMIT("chloroplast");
97 case CBioSource::eGenome_chromatophore: EMIT("chromatophore");
98 case CBioSource::eGenome_chromoplast: EMIT("chromoplast");
99 case CBioSource::eGenome_chromosome: EMIT("chromosome");
100 case CBioSource::eGenome_cyanelle: EMIT("cyanelle");
101 case CBioSource::eGenome_endogenous_virus: EMIT("endogenous_virus");
102 case CBioSource::eGenome_extrachrom: EMIT("extrachrom");
103 case CBioSource::eGenome_genomic: EMIT("genomic");
104 case CBioSource::eGenome_hydrogenosome: EMIT("hydrogenosome");
105 case CBioSource::eGenome_insertion_seq: EMIT("insertion_seq");
106 case CBioSource::eGenome_kinetoplast: EMIT("kinetoplast");
107 case CBioSource::eGenome_leucoplast: EMIT("leucoplast");
108 case CBioSource::eGenome_macronuclear: EMIT("macronuclear");
109 case CBioSource::eGenome_mitochondrion: EMIT("mitochondrion");
110 case CBioSource::eGenome_nucleomorph: EMIT("nucleomorph");
111 case CBioSource::eGenome_plasmid: EMIT("plasmid");
112 case CBioSource::eGenome_plastid: EMIT("plastid");
113 case CBioSource::eGenome_proplastid: EMIT("proplastid");
114 case CBioSource::eGenome_proviral: EMIT("proviral");
115 case CBioSource::eGenome_transposon: EMIT("transposon");
116 case CBioSource::eGenome_unknown: EMIT("unknown");
117 case CBioSource::eGenome_virion: EMIT("virion");
118 }
119 }
120 #undef EMIT
121
122 // ----------------------------------------------------------------------------
GetIdType(const CSeq_id & seqId,string & idType)123 bool CWriteUtil::GetIdType(
124 const CSeq_id& seqId,
125 string& idType )
126 // ----------------------------------------------------------------------------
127 {
128 #define EMIT(str) { idType = str; return true; }
129 switch(seqId.Which()) {
130 default:
131 idType = CSeq_id::SelectionName(seqId.Which());
132 NStr::ToUpper(idType);
133 break;
134
135 case CSeq_id::e_Local: EMIT("Local");
136
137 case CSeq_id::e_Gibbsq:
138 case CSeq_id::e_Gibbmt:
139 case CSeq_id::e_Giim:
140 case CSeq_id::e_Gi: EMIT("GenInfo");
141
142 case CSeq_id::e_Genbank: EMIT("Genbank");
143 case CSeq_id::e_Swissprot: EMIT("SwissProt");
144 case CSeq_id::e_Patent: EMIT("Patent");
145 case CSeq_id::e_Other: EMIT("RefSeq");
146 case CSeq_id::e_Ddbj: EMIT("DDBJ");
147 case CSeq_id::e_Embl: EMIT("EMBL");
148 case CSeq_id::e_Pir: EMIT("PIR");
149 case CSeq_id::e_Prf: EMIT("PRF");
150 case CSeq_id::e_Pdb: EMIT("PDB");
151 case CSeq_id::e_Tpg: EMIT("tpg");
152 case CSeq_id::e_Tpe: EMIT("tpe");
153 case CSeq_id::e_Tpd: EMIT("tpd");
154 case CSeq_id::e_Gpipe: EMIT("gpipe");
155 case CSeq_id::e_Named_annot_track: EMIT("NADB");
156 case CSeq_id::e_General:
157 EMIT(seqId.GetGeneral().GetDb());
158 }
159 #undef EMIT
160 return true;
161 }
162
163 // ----------------------------------------------------------------------------
GetIdType(CBioseq_Handle bsh,string & idType)164 bool CWriteUtil::GetIdType(
165 CBioseq_Handle bsh,
166 string& idType )
167 // ----------------------------------------------------------------------------
168 {
169 if (!bsh) {
170 return false;
171 }
172 CSeq_id_Handle best_idh;
173 try {
174 best_idh = sequence::GetId(bsh, sequence::eGetId_Best);
175 if ( !best_idh ) {
176 best_idh = sequence::GetId(bsh, sequence::eGetId_Canonical);
177 }
178 }
179 catch(...) {
180 return false;
181 }
182 return GetIdType(*best_idh.GetSeqId(), idType);
183 }
184
185 // ----------------------------------------------------------------------------
GetOrgModSubType(const COrgMod & mod,string & subtype,string & subname)186 bool CWriteUtil::GetOrgModSubType(
187 const COrgMod& mod,
188 string& subtype,
189 string& subname)
190 // ----------------------------------------------------------------------------
191 {
192 if (!mod.IsSetSubtype() || !mod.IsSetSubname()) {
193 return false;
194 }
195 subtype = COrgMod::GetSubtypeName(mod.GetSubtype());
196 subname = mod.GetSubname();
197 return true;
198 }
199
200 // ----------------------------------------------------------------------------
GetSubSourceSubType(const CSubSource & sub,string & subtype,string & subname)201 bool CWriteUtil::GetSubSourceSubType(
202 const CSubSource& sub,
203 string& subtype,
204 string& subname)
205 // ----------------------------------------------------------------------------
206 {
207 #define EMIT(str) { subname = str; return true; }
208 if (!sub.IsSetSubtype() || !sub.IsSetName()) {
209 return false;
210 }
211 subtype = CSubSource::GetSubtypeName(sub.GetSubtype());
212
213 switch (sub.GetSubtype()) {
214 default:
215 if (sub.GetName().empty()) {
216 EMIT("indeterminate");
217 }
218 EMIT(sub.GetName());
219 case CSubSource::eSubtype_environmental_sample:
220 case CSubSource::eSubtype_germline:
221 case CSubSource::eSubtype_transgenic:
222 case CSubSource::eSubtype_rearranged:
223 case CSubSource::eSubtype_metagenomic:
224 EMIT("true");
225 }
226 return true;
227 #undef EMIT
228 }
229
230 // ----------------------------------------------------------------------------
GetAaName(const CCode_break & cb,string & aaName)231 bool CWriteUtil::GetAaName(
232 const CCode_break& cb,
233 string& aaName )
234 // ----------------------------------------------------------------------------
235 {
236 static const char* AANames[] = {
237 "---", "Ala", "Asx", "Cys", "Asp", "Glu", "Phe", "Gly", "His", "Ile",
238 "Lys", "Leu", "Met", "Asn", "Pro", "Gln", "Arg", "Ser", "Thr", "Val",
239 "Trp", "Other", "Tyr", "Glx", "Sec", "TERM", "Pyl"
240 };
241 static const char* other = "OTHER";
242
243 unsigned char aa(0);
244 switch (cb.GetAa().Which()) {
245 case CCode_break::C_Aa::e_Ncbieaa:
246 aa = cb.GetAa().GetNcbieaa();
247 aa = CSeqportUtil::GetMapToIndex(
248 CSeq_data::e_Ncbieaa, CSeq_data::e_Ncbistdaa, aa);
249 break;
250 case CCode_break::C_Aa::e_Ncbi8aa:
251 aa = cb.GetAa().GetNcbi8aa();
252 break;
253 case CCode_break::C_Aa::e_Ncbistdaa:
254 aa = cb.GetAa().GetNcbistdaa();
255 break;
256 default:
257 return false;
258 }
259 aaName = ((aa < sizeof(AANames)/sizeof(*AANames)) ? AANames[aa] : other);
260 return true;
261 }
262
263 // ----------------------------------------------------------------------------
GetCodeBreak(const CCode_break & cb,string & cbString)264 bool CWriteUtil::GetCodeBreak(
265 const CCode_break& cb,
266 string& cbString )
267 // ----------------------------------------------------------------------------
268 {
269 string cb_str = ("(pos:");
270 if ( cb.IsSetLoc() ) {
271 const CCode_break::TLoc& loc = cb.GetLoc();
272 switch( loc.Which() ) {
273 default: {
274 cb_str += NStr::IntToString( loc.GetStart(eExtreme_Positional)+1 );
275 cb_str += "..";
276 cb_str += NStr::IntToString( loc.GetStop(eExtreme_Positional)+1 );
277 break;
278 }
279 case CSeq_loc::e_Int: {
280 const CSeq_interval& intv = loc.GetInt();
281 string intv_str;
282 intv_str += NStr::IntToString( intv.GetFrom()+1 );
283 intv_str += "..";
284 intv_str += NStr::IntToString( intv.GetTo()+1 );
285 if ( intv.IsSetStrand() && intv.GetStrand() == eNa_strand_minus ) {
286 intv_str = "complement(" + intv_str + ")";
287 }
288 cb_str += intv_str;
289 break;
290 }
291 }
292 }
293 cb_str += ",aa:";
294
295 string aaName;
296 if (!CWriteUtil::GetAaName(cb, aaName)) {
297 return false;
298 }
299 cb_str += aaName + ")";
300 cbString = cb_str;
301 return true;
302 }
303
304 // ----------------------------------------------------------------------------
GetTrnaCodons(const CTrna_ext & trna,string & codonStr)305 bool CWriteUtil::GetTrnaCodons(
306 const CTrna_ext& trna,
307 string& codonStr )
308 // ----------------------------------------------------------------------------
309 {
310 if (!trna.IsSetCodon()) {
311 return false;
312 }
313 const list<int>& values = trna.GetCodon();
314 if (values.empty()) {
315 return false;
316 }
317 list<int>::const_iterator cit = values.begin();
318 string codons = NStr::IntToString(*cit);
319 for (cit++; cit != values.end(); ++cit) {
320 codons += ",";
321 codons += NStr::IntToString(*cit);
322 }
323 codonStr = codons;
324 return true;
325 }
326
327 // ----------------------------------------------------------------------------
GetGeneRefGene(const CGene_ref & generef,string & gene)328 bool CWriteUtil::GetGeneRefGene(
329 const CGene_ref& generef,
330 string& gene )
331 // ----------------------------------------------------------------------------
332 {
333 #define EMIT(str) { gene = str; return true; }
334 if (generef.IsSetLocus()) {
335 EMIT(generef.GetLocus());
336 }
337 if (generef.IsSetSyn() && generef.GetSyn().size() > 0) {
338 EMIT(generef.GetSyn().front());
339 }
340 if (generef.IsSetDesc()) {
341 EMIT(generef.GetDesc());
342 }
343 return false;
344 #undef EMIT
345 }
346
347 // ----------------------------------------------------------------------------
GetTrnaProductName(const CTrna_ext & trna,string & name)348 bool CWriteUtil::GetTrnaProductName(
349 const CTrna_ext& trna,
350 string& name )
351 // ----------------------------------------------------------------------------
352 {
353 static const string sTrnaList[] = {
354 "tRNA-Gap", "tRNA-Ala", "tRNA-Asx", "tRNA-Cys", "tRNA-Asp", "tRNA-Glu",
355 "tRNA-Phe", "tRNA-Gly", "tRNA-His", "tRNA-Ile", "tRNA-Xle", "tRNA-Lys",
356 "tRNA-Leu", "tRNA-Met", "tRNA-Asn", "tRNA-Pyl", "tRNA-Pro", "tRNA-Gln",
357 "tRNA-Arg", "tRNA-Ser", "tRNA-Thr", "tRNA-Sec", "tRNA-Val", "tRNA-Trp",
358 "tRNA-OTHER", "tRNA-Tyr", "tRNA-Glx", "tRNA-TERM"
359 };
360 static int AACOUNT = sizeof(sTrnaList)/sizeof(string);
361
362 if (!trna.IsSetAa() || !trna.GetAa().IsNcbieaa()) {
363 return false;
364 }
365 int aa = trna.GetAa().GetNcbieaa();
366 (aa == '*') ? (aa = 25) : (aa -= 64);
367 name = ((0 < aa && aa < AACOUNT) ? sTrnaList[aa] : "");
368 return true;
369 }
370
371 // ----------------------------------------------------------------------------
GetTrnaAntiCodon(const CTrna_ext & trna,string & acStr)372 bool CWriteUtil::GetTrnaAntiCodon(
373 const CTrna_ext& trna,
374 string& acStr )
375 // ----------------------------------------------------------------------------
376 {
377 if (!trna.IsSetAnticodon()) {
378 return false;
379 }
380 const CSeq_loc& loc = trna.GetAnticodon();
381 string anticodon;
382 switch( loc.Which() ) {
383 default: {
384 anticodon += NStr::IntToString( loc.GetStart(eExtreme_Positional)+1 );
385 anticodon += "..";
386 anticodon += NStr::IntToString( loc.GetStop(eExtreme_Positional)+1 );
387 break;
388 }
389 case CSeq_loc::e_Int: {
390 const CSeq_interval& intv = loc.GetInt();
391 anticodon += NStr::IntToString( intv.GetFrom()+1 );
392 anticodon += "..";
393 anticodon += NStr::IntToString( intv.GetTo()+1 );
394 if ( intv.IsSetStrand() && intv.GetStrand() == eNa_strand_minus ) {
395 anticodon = "complement(" + anticodon + ")";
396 }
397 break;
398 }
399 }
400 acStr = string("(pos:") + anticodon + ")";
401 return true;
402 }
403
404 // ----------------------------------------------------------------------------
GetDbTag(const CDbtag & dbtag,string & dbTagStr)405 bool CWriteUtil::GetDbTag(
406 const CDbtag& dbtag,
407 string& dbTagStr )
408 //
409 // Note: Different from CDbtag::GetLabel()
410 // ----------------------------------------------------------------------------
411 {
412 string str;
413 if ( dbtag.IsSetDb() ) {
414 str += dbtag.GetDb();
415 }
416 else {
417 str += "NoDB";
418 }
419 if ( dbtag.IsSetTag() ) {
420 if (!str.empty()) {
421 str += ":";
422 }
423 if (dbtag.GetTag().IsId() ) {
424 str += NStr::UIntToString( dbtag.GetTag().GetId() );
425 }
426 if ( dbtag.GetTag().IsStr() ) {
427 str += dbtag.GetTag().GetStr();
428 }
429 }
430 if (str.empty()) {
431 return false;
432 }
433 dbTagStr = str;
434 return true;
435 }
436
437 // ----------------------------------------------------------------------------
GetBiomol(CBioseq_Handle bsh,string & mol_str)438 bool CWriteUtil::GetBiomol(
439 CBioseq_Handle bsh,
440 string& mol_str)
441 // ----------------------------------------------------------------------------
442 {
443 #define EMIT(str) { mol_str = str; return true; }
444 CSeqdesc_CI md(bsh.GetParentEntry(), CSeqdesc::e_Molinfo, 0);
445 if (!md) {
446 return false;
447 }
448 const CMolInfo& molinfo = md->GetMolinfo();
449 if (!molinfo.IsSetBiomol()) {
450 return false;
451 }
452
453 int inst = bsh.GetInst_Mol();
454 int mol = molinfo.GetBiomol();
455
456 switch( mol ) {
457 default:
458 break;
459 case CMolInfo::eBiomol_genomic: {
460 switch (inst) {
461 default:
462 EMIT("genomic");
463 case CSeq_inst::eMol_dna:
464 EMIT("genomic DNA");
465 case CSeq_inst::eMol_rna:
466 EMIT("genomic RNA");
467 }
468 }
469 case CMolInfo::eBiomol_mRNA:
470 EMIT("mRNA");
471 case CMolInfo::eBiomol_rRNA:
472 EMIT("rRNA");
473 case CMolInfo::eBiomol_tRNA:
474 EMIT("tRNA");
475 case CMolInfo::eBiomol_pre_RNA:
476 case CMolInfo::eBiomol_snRNA:
477 case CMolInfo::eBiomol_scRNA:
478 case CMolInfo::eBiomol_snoRNA:
479 case CMolInfo::eBiomol_ncRNA:
480 case CMolInfo::eBiomol_tmRNA:
481 case CMolInfo::eBiomol_transcribed_RNA:
482 EMIT("transcribed RNA");
483 case CMolInfo::eBiomol_other_genetic:
484 case CMolInfo::eBiomol_other: {
485 switch (inst) {
486 default:
487 EMIT("other");
488 case CSeq_inst::eMol_dna:
489 EMIT("other DNA");
490 case CSeq_inst::eMol_rna:
491 EMIT("other RNA");
492 }
493 }
494 case CMolInfo::eBiomol_cRNA:
495 EMIT("viral cRNA");
496
497 case CMolInfo::eBiomol_genomic_mRNA:
498 EMIT("genomic RNA");
499 }
500 switch (inst) {
501 default:
502 EMIT("unassigned");
503 case CSeq_inst::eMol_dna:
504 EMIT("unassigned DNA");
505 case CSeq_inst::eMol_rna:
506 EMIT("unassigned RNA");
507 }
508 return false;
509 #undef EMIT
510 }
511
512 // ----------------------------------------------------------------------------
UrlEncode(const string & raw)513 string CWriteUtil::UrlEncode(
514 const string& raw)
515 // ----------------------------------------------------------------------------
516 {
517 static const char s_Table[256][4] = {
518 "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07", "%08", "%09",
519 "%0A", "%0B", "%0C", "%0D", "%0E", "%0F", "%10", "%11", "%12", "%13",
520 "%14", "%15", "%16", "%17", "%18", "%19", "%1A", "%1B", "%1C", "%1D",
521 "%1E", "%1F", " ", "!", "%22", "%23", "$", "%25", "%26", "%27",
522 "%28", "%29", "%2A", "%2B", "%2C", "-", ".", "%2F", "0", "1",
523 "2", "3", "4", "5", "6", "7", "8", "9", ":", "%3B",
524 "%3C", "%3D", "%3E", "%3F", "@", "A", "B", "C", "D", "E",
525 "F", "G", "H", "I", "J", "K", "L", "M", "N", "O",
526 "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y",
527 "Z", "%5B", "%5C", "%5D", "^", "_", "%60", "a", "b", "c",
528 "d", "e", "f", "g", "h", "i", "j", "k", "l", "m",
529 "n", "o", "p", "q", "r", "s", "t", "u", "v", "w",
530 "x", "y", "z", "%7B", "%7C", "%7D", "%7E", "%7F", "%80", "%81",
531 "%82", "%83", "%84", "%85", "%86", "%87", "%88", "%89", "%8A", "%8B",
532 "%8C", "%8D", "%8E", "%8F", "%90", "%91", "%92", "%93", "%94", "%95",
533 "%96", "%97", "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F",
534 "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7", "%A8", "%A9",
535 "%AA", "%AB", "%AC", "%AD", "%AE", "%AF", "%B0", "%B1", "%B2", "%B3",
536 "%B4", "%B5", "%B6", "%B7", "%B8", "%B9", "%BA", "%BB", "%BC", "%BD",
537 "%BE", "%BF", "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7",
538 "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF", "%D0", "%D1",
539 "%D2", "%D3", "%D4", "%D5", "%D6", "%D7", "%D8", "%D9", "%DA", "%DB",
540 "%DC", "%DD", "%DE", "%DF", "%E0", "%E1", "%E2", "%E3", "%E4", "%E5",
541 "%E6", "%E7", "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF",
542 "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7", "%F8", "%F9",
543 "%FA", "%FB", "|", "%FD", "%FE", "%FF"
544 };
545
546 string encoded;
547 for ( size_t i = 0; i < raw.size(); ++i ) {
548 encoded += s_Table[static_cast<unsigned char>( raw[i] )];
549 }
550 return encoded;
551 }
552
553 // ----------------------------------------------------------------------------
IsLocationOrdered(const CSeq_loc & loc)554 bool CWriteUtil::IsLocationOrdered(
555 const CSeq_loc& loc)
556 // Look whether the given location contains any eNull intervals. If so, the
557 // location is ordered, otherwise not.
558 // ----------------------------------------------------------------------------
559 {
560 switch ( loc.Which() ) {
561 case CSeq_loc::e_Null:
562 return true;
563 case CSeq_loc::e_Mix: {
564 ITERATE (CSeq_loc_mix::Tdata, sub_loc, loc.GetMix().Get()) {
565 if (IsLocationOrdered(**sub_loc)) {
566 return true;
567 }
568 }
569 return false;
570 }
571 default:
572 return false;
573 }
574 }
575
576 // ----------------------------------------------------------------------------
IsSequenceCircular(CBioseq_Handle bsh)577 bool CWriteUtil::IsSequenceCircular(
578 CBioseq_Handle bsh)
579 // ----------------------------------------------------------------------------
580 {
581 if (!bsh || !bsh.IsSetInst_Topology()
582 || bsh.GetInst_Topology() != CSeq_inst::eTopology_circular) {
583 return false;
584 }
585 return true;
586 }
587
588 // ----------------------------------------------------------------------------
NeedsQuoting(const string & str)589 bool CWriteUtil::NeedsQuoting(
590 const string& str )
591 // ----------------------------------------------------------------------------
592 {
593 if(str.empty())
594 return true;
595
596 for (size_t u=0; u < str.length(); ++u) {
597 if (str[u] == '\"')
598 return false;
599 if (str[u] == ' ' || str[u] == ';' || str[u] == ':' || str[u] == '=') {
600 return true;
601 }
602 }
603 return false;
604 }
605
606 // ----------------------------------------------------------------------------
ChangeToPackedInt(CSeq_loc & loc)607 void CWriteUtil::ChangeToPackedInt(
608 CSeq_loc& loc)
609 // Special mission:
610 // Filter out eNull intervals before submitting the location to the "normal"
611 // ChangeToPackedInt() method.
612 // ----------------------------------------------------------------------------
613 {
614 switch ( loc.Which() ) {
615 case CSeq_loc::e_Null:
616 loc.SetPacked_int();
617 return;
618 case CSeq_loc::e_Mix: {
619 vector<CRef<CSeq_loc> > sub_locs;
620 sub_locs.reserve(loc.GetMix().Get().size());
621 ITERATE (CSeq_loc_mix::Tdata, orig_sub_loc, loc.GetMix().Get()) {
622 if ((*orig_sub_loc)->Which() == CSeq_loc::e_Null) {
623 continue;
624 }
625 CRef<CSeq_loc> new_sub_loc(new CSeq_loc);
626 new_sub_loc->Assign(**orig_sub_loc);
627 ChangeToPackedInt(*new_sub_loc);
628 sub_locs.push_back(new_sub_loc);
629 }
630 loc.SetPacked_int(); // in case there are zero intervals
631 ITERATE (vector<CRef<CSeq_loc> >, sub_loc, sub_locs) {
632 copy((*sub_loc)->GetPacked_int().Get().begin(),
633 (*sub_loc)->GetPacked_int().Get().end(),
634 back_inserter(loc.SetPacked_int().Set()));
635 }
636 }
637 return;
638 default:
639 loc.ChangeToPackedInt();
640 return;
641 }
642 }
643
644 // ----------------------------------------------------------------------------
GetBestId(CSeq_id_Handle idh,CScope & scope,string & best_id)645 bool CWriteUtil::GetBestId(
646 CSeq_id_Handle idh,
647 CScope& scope,
648 string& best_id)
649 // ----------------------------------------------------------------------------
650 {
651 return CGenbankIdResolve::Get().GetBestId(idh, scope, best_id);
652 }
653
654 // ----------------------------------------------------------------------------
GetBestId(const CMappedFeat & mf,string & best_id)655 bool CWriteUtil::GetBestId(
656 const CMappedFeat& mf,
657 string& best_id)
658 // ----------------------------------------------------------------------------
659 {
660 return CGenbankIdResolve::Get().GetBestId(mf, best_id);
661 }
662
663 // ----------------------------------------------------------------------------
GetQualifier(CMappedFeat mf,const string & key,string & value)664 bool CWriteUtil::GetQualifier(
665 CMappedFeat mf,
666 const string& key,
667 string& value)
668 // ----------------------------------------------------------------------------
669 {
670 if (!mf.IsSetQual()) {
671 return false;
672 }
673 const vector<CRef<CGb_qual> >& quals = mf.GetQual();
674 vector<CRef<CGb_qual> >::const_iterator it = quals.begin();
675 for (; it != quals.end(); ++it) {
676 if (!(*it)->CanGetQual() || !(*it)->CanGetVal()) {
677 continue;
678 }
679 if ((*it)->GetQual() == key) {
680 value = (*it)->GetVal();
681 return true;
682 }
683 }
684 return false;
685 }
686
687 // ---------------------------------------------------------------------------
xAssignSequenceIsGenomicRecord()688 void CGffFeatureContext::xAssignSequenceIsGenomicRecord()
689 // ---------------------------------------------------------------------------
690 {
691 m_bSequenceIsGenomicRecord = false;
692 if (!m_bsh) {
693 return;
694 }
695 if (!m_bsh || !m_bsh.IsSetDescr()) {
696 return;
697 }
698 const CSeq_descr& descr = m_bsh.GetDescr();
699 if (!descr.CanGet()) {
700 return;
701 }
702 const list< CRef< CSeqdesc > >& listDescr = descr.Get();
703 for (list< CRef< CSeqdesc > >::const_iterator cit = listDescr.begin();
704 cit != listDescr.end(); ++cit) {
705 const CSeqdesc& desc = **cit;
706 if (!desc.IsMolinfo()) {
707 continue;
708 }
709 const CMolInfo& molInfo = desc.GetMolinfo();
710 if (!molInfo.IsSetBiomol()) {
711 continue;
712 }
713 CMolInfo::TBiomol bioMol = molInfo.GetBiomol();
714 m_bSequenceIsGenomicRecord = (
715 (bioMol == CMolInfo::eBiomol_genomic) ||
716 (bioMol == CMolInfo::eBiomol_cRNA));
717 return;
718 }
719 return;
720 }
721
722 // ---------------------------------------------------------------------------
xAssignSequenceHasBioSource()723 void CGffFeatureContext::xAssignSequenceHasBioSource()
724 // ---------------------------------------------------------------------------
725 {
726 m_bSequenceHasBioSource = false;
727 if (!m_bsh) {
728 return;
729 }
730 if (m_bsh.IsSetDescr()) {
731 const CSeq_descr& descr = m_bsh.GetDescr();
732 if (descr.CanGet()) {
733 const list< CRef< CSeqdesc > >& listDescr = descr.Get();
734 for (list< CRef< CSeqdesc > >::const_iterator cit = listDescr.begin();
735 cit != listDescr.end(); ++cit) {
736 const CSeqdesc& desc = **cit;
737 if (desc.IsSource()) {
738 m_bSequenceHasBioSource = true;
739 return;
740 }
741 }
742 }
743 }
744 CBioseq_set_Handle setH;
745 setH = m_bsh.GetParentBioseq_set();
746 if (setH && setH.IsSetDescr()) {
747 const CSeq_descr& descr = setH.GetDescr();
748 if (descr.CanGet()) {
749 const list< CRef< CSeqdesc > >& listDescr = descr.Get();
750 for (list< CRef< CSeqdesc > >::const_iterator cit = listDescr.begin();
751 cit != listDescr.end(); ++cit) {
752 const CSeqdesc& desc = **cit;
753 if (desc.IsSource()) {
754 m_bSequenceHasBioSource = true;
755 return;
756 }
757 }
758 }
759 }
760 return;
761 }
762
763 // ----------------------------------------------------------------------------
FindBestGeneParent(const CMappedFeat & mf)764 CMappedFeat CGffFeatureContext::FindBestGeneParent(const CMappedFeat& mf)
765 // ----------------------------------------------------------------------------
766 {
767 if (mf == m_mfLastIn) {
768 return m_mfLastOut;
769 }
770 m_mfLastIn = mf;
771
772 CSeqFeatData::ESubtype subType = mf.GetFeatSubtype();
773 if (subType == CSeqFeatData::eSubtype_mobile_element) {
774 m_mfLastOut = CMappedFeat();
775 return m_mfLastOut;
776 }
777
778 if (mf.GetFeatSubtype() == CSeqFeatData::eSubtype_mRNA) {
779 m_mfLastOut = feature::GetBestGeneForMrna(mf, &m_ft);
780 }
781 else {
782 m_mfLastOut = feature::GetBestGeneForFeat(mf, &m_ft);
783 }
784 return m_mfLastOut;
785 }
786
787 // ----------------------------------------------------------------------------
GetUserObjectByType(const CUser_object & uo,const string & strType)788 CConstRef<CUser_object> CWriteUtil::GetUserObjectByType(
789 const CUser_object& uo,
790 const string& strType)
791 // ----------------------------------------------------------------------------
792 {
793 if (uo.IsSetType() && uo.GetType().IsStr() &&
794 uo.GetType().GetStr() == strType) {
795 return CConstRef<CUser_object>(&uo);
796 }
797 const CUser_object::TData& fields = uo.GetData();
798 for (CUser_object::TData::const_iterator it = fields.begin();
799 it != fields.end();
800 ++it) {
801 const CUser_field& field = **it;
802 if (field.IsSetData()) {
803 const CUser_field::TData& data = field.GetData();
804 if (data.Which() == CUser_field::TData::e_Object) {
805 CConstRef<CUser_object> recur = CWriteUtil::GetUserObjectByType(
806 data.GetObject(), strType);
807 if (recur) {
808 return recur;
809 }
810 }
811 }
812 }
813 return CConstRef<CUser_object>();
814 }
815
816 // ----------------------------------------------------------------------------
GetUserObjectByType(const list<CRef<CUser_object>> & uos,const string & strType)817 CConstRef<CUser_object> CWriteUtil::GetUserObjectByType(
818 const list<CRef<CUser_object > >& uos,
819 const string& strType)
820 // ----------------------------------------------------------------------------
821 {
822 CConstRef<CUser_object> pResult;
823 typedef list<CRef<CUser_object > >::const_iterator CIT;
824 for (CIT cit = uos.begin(); cit != uos.end(); ++cit) {
825 const CUser_object& uo = **cit;
826 pResult = CWriteUtil::GetUserObjectByType(uo, strType);
827 if (pResult) {
828 return pResult;
829 }
830 }
831 return CConstRef<CUser_object>();
832 }
833
834 // ----------------------------------------------------------------------------
GetModelEvidence(CMappedFeat mf)835 CConstRef<CUser_object> CWriteUtil::GetModelEvidence(
836 CMappedFeat mf)
837 // ----------------------------------------------------------------------------
838 {
839 CConstRef<CUser_object> me;
840 if (mf.IsSetExt()) {
841 me = CWriteUtil::GetUserObjectByType(mf.GetExt(), "ModelEvidence");
842 }
843 if (!me && mf.IsSetExts()) {
844 me = CWriteUtil::GetUserObjectByType(mf.GetExts(), "ModelEvidence");
845 }
846 return me;
847 }
848
849 // -----------------------------------------------------------------------------
850 size_t
s_CountAccessions(const CUser_field & field)851 s_CountAccessions(
852 const CUser_field& field)
853 // -----------------------------------------------------------------------------
854 {
855 size_t count = 0;
856 if (!field.IsSetData() || !field.GetData().IsFields()) {
857 return 0;
858 }
859
860 //
861 // Each accession consists of yet another block of "Fields" one of which carries
862 // a label named "accession":
863 //
864 ITERATE(CUser_field::TData::TFields, it, field.GetData().GetFields()) {
865 const CUser_field& uf = **it;
866 if (uf.CanGetData() && uf.GetData().IsFields()) {
867
868 ITERATE(CUser_field::TData::TFields, it2, uf.GetData().GetFields()) {
869 const CUser_field& inner = **it2;
870 if (inner.IsSetLabel() && inner.GetLabel().IsStr()) {
871 if (inner.GetLabel().GetStr() == "accession") {
872 ++count;
873 }
874 }
875 }
876 }
877 }
878 return count;
879 }
880
881
882 // ----------------------------------------------------------------------------
GetStringForModelEvidence(CMappedFeat mf,string & mestr)883 bool CWriteUtil::GetStringForModelEvidence(
884 CMappedFeat mf,
885 string& mestr)
886 // ----------------------------------------------------------------------------
887 {
888 CConstRef<CUser_object> me = CWriteUtil::GetModelEvidence(mf);
889 if (!me) {
890 return false;
891 }
892
893 size_t numRna(0), numEst(0), numProtein(0), numLongSra(0),
894 rnaseqBaseCoverage(0), rnaseqBiosamplesIntronsFull(0);
895 string method;
896 const CUser_object::TData& fields = me->GetData();
897 ITERATE(CUser_object::TData, it, fields) {
898 const CUser_field& field = **it;
899 if (!field.IsSetLabel() || !field.GetLabel().IsStr()) {
900 continue;
901 }
902 if (!field.IsSetData()) {
903 continue;
904 }
905 const string& label = field.GetLabel().GetStr();
906 if (label == "Method") {
907 method = field.GetData().GetStr();
908 continue;
909 }
910 if (label == "Counts") {
911 ITERATE(CUser_field::TData::TFields, inner, field.GetData().GetFields()) {
912 const CUser_field& field = **inner;
913 if (!field.IsSetLabel() || !field.GetLabel().IsStr()) {
914 continue;
915 }
916 if (!field.IsSetData()) {
917 continue;
918 }
919 const string& label = field.GetLabel().GetStr();
920 if (label == "mRNA") {
921 numRna = field.GetData().GetInt();
922 continue;
923 }
924 if (label == "EST") {
925 numEst = field.GetData().GetInt();
926 continue;
927 }
928 if (label == "Protein") {
929 numProtein = field.GetData().GetInt();
930 continue;
931 }
932 if (label == "long SRA read") {
933 numLongSra = field.GetData().GetInt();
934 continue;
935 }
936 }
937 }
938 if (label == "mRNA") {
939 numRna = s_CountAccessions(field);
940 continue;
941 }
942 if (label == "EST") {
943 numEst = s_CountAccessions(field);
944 continue;
945 }
946 if (label == "Protein") {
947 numProtein = s_CountAccessions(field);
948 continue;
949 }
950 if (label == "long SRA read") {
951 numLongSra = s_CountAccessions(field);
952 continue;
953 }
954 if (label == "rnaseq_base_coverage") {
955 if (field.CanGetData() && field.GetData().IsInt()) {
956 rnaseqBaseCoverage = field.GetData().GetInt();
957 }
958 continue;
959 }
960 if (label == "rnaseq_biosamples_introns_full") {
961 if (field.CanGetData() && field.GetData().IsInt()) {
962 rnaseqBiosamplesIntronsFull = field.GetData().GetInt();
963 }
964 continue;
965 }
966 }
967
968 //CSeqFeatData::ESubtype st = mf.GetFeatSubtype();
969 CNcbiOstrstream text;
970 //text << "Derived by automated computational analysis";
971 //if (!NStr::IsBlank(method)) {
972 // text << " using gene prediction method: " << method;
973 //}
974 //text << ".";
975
976 if (numRna > 0 || numEst > 0 || numProtein > 0 || numLongSra > 0 ||
977 rnaseqBaseCoverage > 0)
978 {
979 text << "Supporting evidence includes similarity to:";
980 }
981 string section_prefix = " ";
982 // The countable section
983 if (numRna > 0 || numEst > 0 || numProtein > 0 || numLongSra > 0)
984 {
985 text << section_prefix;
986 string prefix;
987 if (numRna > 0) {
988 text << prefix << numRna << " mRNA";
989 if (numRna > 1) {
990 text << 's';
991 }
992 prefix = ", ";
993 }
994 if (numEst > 0) {
995 text << prefix << numEst << " EST";
996 if (numEst > 1) {
997 text << 's';
998 }
999 prefix = ", ";
1000 }
1001 if (numProtein > 0) {
1002 text << prefix << numProtein << " Protein";
1003 if (numProtein > 1) {
1004 text << 's';
1005 }
1006 prefix = ", ";
1007 }
1008 if (numLongSra > 0) {
1009 text << prefix << numLongSra << " long SRA read";
1010 if (numLongSra > 1) {
1011 text << 's';
1012 }
1013 prefix = ", ";
1014 }
1015 section_prefix = ", and ";
1016 }
1017 // The RNASeq section
1018 if (rnaseqBaseCoverage > 0)
1019 {
1020 text << section_prefix;
1021
1022 text << rnaseqBaseCoverage << "% coverage of the annotated genomic feature by RNAseq alignments";
1023 if (rnaseqBiosamplesIntronsFull > 0) {
1024 text << ", including " << rnaseqBiosamplesIntronsFull;
1025 text << " sample";
1026 if (rnaseqBiosamplesIntronsFull > 1) {
1027 text << 's';
1028 }
1029 text << " with support for all annotated introns";
1030 }
1031
1032 section_prefix = ", and ";
1033 }
1034 mestr = CNcbiOstrstreamToString(text);
1035 return true;
1036 }
1037
1038
1039 // ----------------------------------------------------------------------------
GetThreeFeatType(const CSeq_feat & feat,string & threeFeatType)1040 bool CWriteUtil::GetThreeFeatType(
1041 const CSeq_feat& feat,
1042 string& threeFeatType)
1043 // ----------------------------------------------------------------------------
1044 {
1045 if (!feat.IsSetExts()) {
1046 return false;
1047 }
1048 auto pUo = CWriteUtil::GetUserObjectByType(feat.GetExts(), "BED");
1049 if (!pUo || !pUo->HasField("location")) {
1050 return false;
1051 }
1052 threeFeatType = pUo->GetField("location").GetString();
1053 return true;
1054 }
1055
1056
1057 // ----------------------------------------------------------------------------
GetThreeFeatScore(const CSeq_feat & feat,int & score)1058 bool CWriteUtil::GetThreeFeatScore(
1059 const CSeq_feat& feat,
1060 int& score)
1061 // ----------------------------------------------------------------------------
1062 {
1063 if (!feat.IsSetExts()) {
1064 return false;
1065 }
1066 auto pUo = CWriteUtil::GetUserObjectByType(feat.GetExts(), "DisplaySettings");
1067 if (!pUo || !pUo->HasField("score")) {
1068 return false;
1069 }
1070 score = pUo->GetField("score").GetInt();
1071 return true;
1072 }
1073
1074
1075 // ----------------------------------------------------------------------------
GetThreeFeatRgb(const CSeq_feat & feat,string & color)1076 bool CWriteUtil::GetThreeFeatRgb(
1077 const CSeq_feat& feat,
1078 string& color)
1079 // ----------------------------------------------------------------------------
1080 {
1081 if (!feat.IsSetExts()) {
1082 return false;
1083 }
1084 auto pUo = CWriteUtil::GetUserObjectByType(feat.GetExts(), "DisplaySettings");
1085 if (!pUo || !pUo->HasField("color")) {
1086 return false;
1087 }
1088 color = pUo->GetField("color").GetString();
1089 return true;
1090 }
1091
1092
1093 // ----------------------------------------------------------------------------
IsThreeFeatFormat(const CSeq_annot & annot)1094 bool CWriteUtil::IsThreeFeatFormat(
1095 const CSeq_annot& annot)
1096 // ----------------------------------------------------------------------------
1097 {
1098 using FTABLE = list<CRef<CSeq_feat> >;
1099
1100 if (!annot.IsFtable()) {
1101 return false;
1102 }
1103 const FTABLE& ftable = annot.GetData().GetFtable();
1104 auto remainingTests = 100;
1105 for (auto pFeat: ftable) {
1106 string dummy;
1107 if (!CWriteUtil::GetThreeFeatType(*pFeat, dummy)) {
1108 return false;
1109 }
1110 if (--remainingTests == 0) {
1111 break;
1112 }
1113 }
1114 return true;
1115 }
1116
1117
1118 // ----------------------------------------------------------------------------
GetStringForGoMarkup(const vector<CRef<CUser_field>> & fields,string & goMarkup,bool relaxed)1119 bool CWriteUtil::GetStringForGoMarkup(
1120 const vector<CRef<CUser_field > >& fields,
1121 string& goMarkup,
1122 bool relaxed)
1123 // ----------------------------------------------------------------------------
1124 {
1125 vector<string> strings;
1126 if (! CWriteUtil::GetStringsForGoMarkup(fields, strings, relaxed)) {
1127 return false;
1128 }
1129 goMarkup = NStr::Join(strings, ",");
1130 return true;
1131 }
1132
1133 // ----------------------------------------------------------------------------
GetStringsForGoMarkup(const vector<CRef<CUser_field>> & fields,vector<string> & goMarkup,bool relaxed)1134 bool CWriteUtil::GetStringsForGoMarkup(
1135 const vector<CRef<CUser_field > >& fields,
1136 vector<string>& goMarkup,
1137 bool relaxed)
1138 // ----------------------------------------------------------------------------
1139 {
1140 goMarkup.clear();
1141 for (const auto& field: fields) {
1142 if (!field->IsSetLabel() || !field->GetLabel().IsId()
1143 || ( field->GetLabel().GetId() != 0 && ! relaxed)) {
1144 continue;
1145 }
1146 if (!field->IsSetData() || !field->GetData().IsFields()) {
1147 continue;
1148 }
1149 string descriptive, goId, pubmedId, evidence;
1150 const auto& subFields = field->GetData().GetFields();
1151 for (const auto& subField: subFields) {
1152 if (!subField->IsSetLabel() || ! subField->GetLabel().IsStr()) {
1153 continue;
1154 }
1155 const auto& subLabel = subField->GetLabel().GetStr();
1156 if (subLabel == "text string") {
1157 descriptive = subField->GetData().GetStr();
1158 continue;
1159 }
1160 if (subLabel == "go id") {
1161 goId = subField->GetData().GetStr();
1162 continue;
1163 }
1164 if (subLabel == "pubmed id") {
1165 pubmedId = NStr::IntToString(subField->GetData().GetInt());
1166 continue;
1167 }
1168 if (subLabel == "evidence") {
1169 evidence = subField->GetData().GetStr();
1170 continue;
1171 }
1172 }
1173 goMarkup.push_back(descriptive + "|" + goId + "|" + pubmedId + "|" + evidence);
1174 }
1175 return true;
1176 }
1177
1178 // ----------------------------------------------------------------------------
GetListOfGoIds(const vector<CRef<CUser_field>> & fields,list<std::string> & goIds,bool relaxed)1179 bool CWriteUtil::GetListOfGoIds(
1180 const vector<CRef<CUser_field > >& fields,
1181 list<std::string>& goIds,
1182 bool relaxed)
1183 // ----------------------------------------------------------------------------
1184 {
1185 for (const auto& field: fields) {
1186 if (!field->IsSetLabel() || !field->GetLabel().IsId()
1187 || ( field->GetLabel().GetId() != 0 && ! relaxed)) {
1188 continue;
1189 }
1190 if (!field->IsSetData() || !field->GetData().IsFields()) {
1191 continue;
1192 }
1193 string descriptive, goId, pubmedId, evidence;
1194 const auto& subFields = field->GetData().GetFields();
1195 for (const auto& subField: subFields) {
1196 if (!subField->IsSetLabel() || ! subField->GetLabel().IsStr()) {
1197 continue;
1198 }
1199 const auto& subLabel = subField->GetLabel().GetStr();
1200 if (subLabel == "go id") {
1201 goId = subField->GetData().GetStr();
1202 goIds.push_back(string("GO:")+goId);
1203 continue;
1204 }
1205 }
1206 }
1207 return true;
1208 }
1209
1210 // ----------------------------------------------------------------------------
CompareLocations(const CMappedFeat & lhs,const CMappedFeat & rhs)1211 bool CWriteUtil::CompareLocations(
1212 const CMappedFeat& lhs,
1213 const CMappedFeat& rhs)
1214 // ----------------------------------------------------------------------------
1215 {
1216 const CSeq_loc& lhl = lhs.GetLocation();
1217 const CSeq_loc& rhl = rhs.GetLocation();
1218
1219 //test1: id, alphabetical
1220 string lhs_id = CWriteUtil::GetStringId(lhl);
1221 string rhs_id = CWriteUtil::GetStringId(rhl);
1222 if (lhs_id != rhs_id) {
1223 return (lhs_id < rhs_id);
1224 }
1225
1226 //test2: loc-start ascending
1227 size_t lhs_start = lhl.GetStart(ESeqLocExtremes::eExtreme_Positional);
1228 size_t rhs_start = rhl.GetStart(ESeqLocExtremes::eExtreme_Positional);
1229 if (lhs_start != rhs_start) {
1230 return (lhs_start < rhs_start);
1231 }
1232 //test3: loc-stop decending
1233 size_t lhs_stop = lhl.GetStop(ESeqLocExtremes::eExtreme_Positional);
1234 size_t rhs_stop = rhl.GetStop(ESeqLocExtremes::eExtreme_Positional);
1235 return (lhs_stop > rhs_stop);
1236 }
1237
1238
1239 // ----------------------------------------------------------------------------
GetStringId(const CSeq_loc & loc)1240 string CWriteUtil::GetStringId(
1241 const CSeq_loc& loc)
1242 // ----------------------------------------------------------------------------
1243 {
1244 if (loc.GetId()) {
1245 return loc.GetId()->AsFastaString();
1246 }
1247 return "";
1248 }
1249
1250 // ----------------------------------------------------------------------------
IsNucleotideSequence(CBioseq_Handle bsh)1251 bool CWriteUtil::IsNucleotideSequence(CBioseq_Handle bsh)
1252 // ----------------------------------------------------------------------------
1253 {
1254 if (bsh.CanGetInst_Mol()) {
1255 const auto& mol = bsh.GetBioseqMolType();
1256 switch (mol) {
1257 default:
1258 break;
1259 case CSeq_inst::eMol_dna:
1260 case CSeq_inst::eMol_na:
1261 case CSeq_inst::eMol_rna:
1262 return true;
1263 case CSeq_inst::eMol_aa:
1264 return false;
1265 }
1266 }
1267 if (bsh.CanGetDescr()) {
1268 const auto& descrs = bsh.GetDescr().Get();
1269 for (const auto& pDescr: descrs) {
1270 if (pDescr->IsMolinfo() && pDescr->GetMolinfo().CanGetBiomol()) {
1271 switch(pDescr->GetMolinfo().GetBiomol()) {
1272 case CMolInfo::eBiomol_unknown:
1273 case CMolInfo::eBiomol_other:
1274 break;
1275 case CMolInfo::eBiomol_peptide:
1276 return false;
1277 default:
1278 return true;
1279 }
1280 }
1281 }
1282 }
1283 return false;
1284 }
1285
1286
1287 // ----------------------------------------------------------------------------
IsProteinSequence(CBioseq_Handle bsh)1288 bool CWriteUtil::IsProteinSequence(CBioseq_Handle bsh)
1289 // ----------------------------------------------------------------------------
1290 {
1291 if (bsh.CanGetInst_Mol()) {
1292 const auto& mol = bsh.GetBioseqMolType();
1293 switch (mol) {
1294 default:
1295 break;
1296 case CSeq_inst::eMol_dna:
1297 case CSeq_inst::eMol_na:
1298 case CSeq_inst::eMol_rna:
1299 return false;
1300 case CSeq_inst::eMol_aa:
1301 return true;
1302 }
1303 }
1304 if (bsh.CanGetDescr()) {
1305 const auto& descrs = bsh.GetDescr().Get();
1306 for (const auto& pDescr: descrs) {
1307 if (pDescr->IsMolinfo() && pDescr->GetMolinfo().CanGetBiomol()) {
1308 switch(pDescr->GetMolinfo().GetBiomol()) {
1309 case CMolInfo::eBiomol_unknown:
1310 case CMolInfo::eBiomol_other:
1311 break;
1312 case CMolInfo::eBiomol_peptide:
1313 return true;
1314 default:
1315 return false;
1316 }
1317 }
1318 }
1319 }
1320 return false;
1321 }
1322
1323 // ----------------------------------------------------------------------------
IsTransspliced(const CSeq_feat & feature)1324 bool CWriteUtil::IsTransspliced(const CSeq_feat& feature)
1325 // ----------------------------------------------------------------------------
1326 {
1327 return (feature.IsSetExcept_text() && feature.GetExcept_text() == "trans-splicing");
1328 }
1329
1330
1331 // ----------------------------------------------------------------------------
IsTransspliced(const CMappedFeat & mf)1332 bool CWriteUtil::IsTransspliced(const CMappedFeat& mf)
1333 // ----------------------------------------------------------------------------
1334 {
1335 return CWriteUtil::IsTransspliced(mf.GetMappedFeature());
1336 //return (mf.IsSetExcept_text() && mf.GetExcept_text() == "trans-splicing");
1337 }
1338
1339
1340 // ----------------------------------------------------------------------------
GetTranssplicedEndpoints(const CSeq_loc & loc,unsigned int & inPoint,unsigned int & outPoint)1341 bool CWriteUtil::GetTranssplicedEndpoints(
1342 // ----------------------------------------------------------------------------
1343 const CSeq_loc& loc,
1344 unsigned int& inPoint,
1345 unsigned int& outPoint)
1346 // start determined by the minimum start of any sub interval
1347 // stop determined by the maximum stop of any sub interval
1348 // ----------------------------------------------------------------------------
1349 {
1350 typedef list<CRef<CSeq_interval> >::const_iterator CIT;
1351
1352 CSeq_loc testLoc;
1353 testLoc.Assign(loc);
1354 if (testLoc.IsMix()) {
1355 testLoc.ChangeToPackedInt();
1356 }
1357 if (!testLoc.IsPacked_int()) {
1358 return false;
1359 }
1360 const CPacked_seqint& packedInt = testLoc.GetPacked_int();
1361 inPoint = packedInt.GetStart(eExtreme_Biological);
1362 outPoint = packedInt.GetStop(eExtreme_Biological);
1363 const list<CRef<CSeq_interval> >& intvs = packedInt.Get();
1364 for (CIT cit = intvs.begin(); cit != intvs.end(); cit++) {
1365 const CSeq_interval& intv = **cit;
1366 if (intv.GetFrom() < inPoint) {
1367 inPoint = intv.GetFrom();
1368 }
1369 if (intv.GetTo() > outPoint) {
1370 outPoint = intv.GetTo();
1371 }
1372 }
1373 return true;
1374 }
1375
1376 // ----------------------------------------------------------------------------
GetEffectiveStrand(const CSeq_interval & interval)1377 ENa_strand CWriteUtil::GetEffectiveStrand(
1378 const CSeq_interval& interval)
1379 // ----------------------------------------------------------------------------
1380 {
1381 // if it's not explicitely minus, then it's plus
1382 // (not true for other location types)
1383 return (interval.IsSetStrand() && interval.GetStrand() == eNa_strand_minus)
1384 ?
1385 eNa_strand_minus :
1386 eNa_strand_plus;
1387 }
1388
1389
1390
1391 END_NCBI_SCOPE
1392