1 /*  $Id: seqannot_splicer_util.cpp 607723 2020-05-06 18:48:03Z ivanov $
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *               National Center for Biotechnology Information
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government have not placed any restriction on its use or reproduction.
13 *
14 *  Although all reasonable efforts have been taken to ensure the accuracy
15 *  and reliability of the software and data, the NLM and the U.S.
16 *  Government do not and cannot warrant the performance or results that
17 *  may be obtained by using this software or data. The NLM and the U.S.
18 *  Government disclaim all warranties, express or implied, including
19 *  warranties of performance, merchantability or fitness for any particular
20 *  purpose.
21 *
22 *  Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author:  David McElhany
27 *
28 * File Description:
29 *   See main application file.
30 *
31 * ===========================================================================
32 */
33 
34 #include <ncbi_pch.hpp>
35 #include <stack>
36 
37 #include <corelib/ncbiexpt.hpp>
38 #include <corelib/ncbimisc.hpp>
39 #include <corelib/ncbiobj.hpp>
40 
41 #include <objects/seq/Seq_annot.hpp>
42 #include <objects/seqloc/Seq_id.hpp>
43 
44 #include <serial/objectio.hpp>
45 #include <serial/objistr.hpp>
46 #include <serial/serialdef.hpp>
47 
48 #include "seqannot_splicer_stats.hpp"
49 #include "seqannot_splicer_util.hpp"
50 
51 USING_SCOPE(ncbi);
52 USING_SCOPE(objects);
53 
54 
55 ///////////////////////////////////////////////////////////////////////////
56 // Typedefs
57 
58 // TSeqRefCont _could_ be different than TIds, so give it its own type.
59 typedef list<TSeqRef>                       TSeqRefCont;
60 typedef list<CNcbiStreampos>                TPosCont;
61 
62 typedef CObjectFor<TSeqRefCont>             TSeqRefContOb;
63 typedef CObjectFor<TPosCont>                TPosContOb;
64 
65 typedef CRef<TSeqRefContOb>                 TSeqRefContRef;
66 typedef CRef<TPosContOb>                    TPosContRef;
67 
68 typedef set<TSeqRef,PPtrLess<TSeqRef> >     TContextSeqIds;
69 typedef struct SContext_tag*                TContextPtr;
70 typedef list<TContextPtr>                   TContextList;
71 typedef stack<TContextPtr>                  TContextPtrStack;
72 
73 typedef map<CNcbiStreampos,TSeqRefContRef>              TAnnotToSeqIdMap;
74 typedef map<TSeqRef,TPosContRef,PPtrLess<TSeqRef> >     TSeqIdToAnnotMap;
75 
76 typedef TAnnotToSeqIdMap::const_iterator    TAnnotToSeqIdMapCIter;
77 typedef TAnnotToSeqIdMap::iterator          TAnnotToSeqIdMapIter;
78 typedef TContextList::iterator              TContextListIter;
79 typedef TPosCont::iterator                  TPosContIter;
80 typedef TSeqIdToAnnotMap::const_iterator    TSeqIdToAnnotMapCIter;
81 typedef TSeqIdToAnnotMap::iterator          TSeqIdToAnnotMapIter;
82 typedef TSeqRefCont::iterator               TSeqRefContIter;
83 
84 typedef struct SContext_tag {
85     EContextType    type;
86     bool            has_annots;
87     TContextSeqIds  seqids;
88     TContextList    sub_contexts;
89 } SContext;
90 
91 
92 ///////////////////////////////////////////////////////////////////////////
93 // Module static functions and objects
94 
95 static void s_DeleteContext(TContextPtr context);
96 
97 // The following function could be used for debugging.
98 #if 0
99 static void s_DumpContext(TContextPtr context);
100 #endif
101 
102 static TSeqAnnotChoiceMaskFlags s_GetSeqAnnotChoiceMask(const CSeq_annot* annot);
103 
104 static TSeqIdChoiceMaskFlags s_GetSeqIdChoiceMask(const CSeq_id* seqid);
105 
106 static bool s_RemoveAnnot(TAnnotToSeqIdMapIter annot_in_map_iter);
107 
108 static bool s_SpliceAnnot(unique_ptr<CObjectIStream>& sai,
109                           COStreamContainer& osc,
110                           TAnnotToSeqIdMapIter annot_in_map);
111 
112 static void s_SpliceAnnotsForSeqId(unique_ptr<CObjectIStream>& sai,
113                                    COStreamContainer& osc,
114                                    TSeqIdToAnnotMapIter seqid_iter);
115 
116 
117 // These static global objects defined here for a test simplicity.
118 // DO NOT USE static non-POD types in your program!!!
119 
120 static TAnnotToSeqIdMap             s_AnnotToSeqIdMap;
121 static TSeqIdToAnnotMap             s_SeqIdToAnnotMap;
122 
123 static TContextPtr                  s_CurrentContextPtr = NULL;
124 static TContextPtr                  s_RootContextPtr = NULL;
125 static TContextList                 s_ContextSequence;
126 static TContextListIter             s_ContextSequenceIter;
127 static TContextPtrStack             s_ContextPtrStack;
128 
129 static TSeqAnnotChoiceMaskFlags     s_SeqAnnotChoiceMask = fSAMF_Default;
130 static TSeqIdChoiceMaskFlags        s_SeqIdChoiceMask = fSIMF_Default;
131 
132 static CNcbiStreampos               s_AnnotPos;
133 
134 
135 ///////////////////////////////////////////////////////////////////////////
136 // Program utility functions
137 
138 // This function is called during preprocessing of the Seq-entry and adds
139 // a given Seq-id to the current context.
AddSeqIdToCurrentContext(TSeqRef id)140 void AddSeqIdToCurrentContext(TSeqRef id)
141 {
142     // Only add selected Seq-id CHOICE types.
143     if (IsSeqIdChoiceSelected(id)) {
144         s_CurrentContextPtr->seqids.insert(id);
145     }
146 }
147 
148 
149 // This function is called during preprocessing of the Seq-entry, at the end
150 // of the Bioseq or Bioseq-set, and helps create the context tree.
ContextEnd(void)151 void ContextEnd(void)
152 {
153     s_ContextPtrStack.pop();
154     if (s_ContextPtrStack.empty()) {
155         s_CurrentContextPtr = NULL;
156     } else {
157         s_CurrentContextPtr = s_ContextPtrStack.top();
158     }
159 }
160 
161 // This function is called during preprocessing of the Seq-entry, at the start
162 // of the Bioseq or Bioseq-set, and helps create the context tree.
ContextStart(CObjectIStream & in,EContextType type)163 void ContextStart(CObjectIStream& in, EContextType type)
164 {
165     // Create and populate new context.
166     TContextPtr context = new SContext;
167     context->type = type;
168     context->has_annots = false;
169     context->seqids.clear();
170     context->sub_contexts.clear();
171 
172     // Insert the new context into the full context tree.
173     if (NULL == s_RootContextPtr) {
174         s_RootContextPtr = context;
175     } else {
176         s_CurrentContextPtr->sub_contexts.push_back(context);
177     }
178 
179     // Use a stack to track the history of current context pointers.
180     s_ContextPtrStack.push(context);
181 
182     // Keep a full chronological sequence of context pointers for use when
183     // copying.
184     s_ContextSequence.push_back(context);
185 
186     // Use a convenience variable to track the current context.
187     s_CurrentContextPtr = context;
188 }
189 
190 
191 // This function is called during copying of the Seq-entry, at the start
192 // of the Bioseq or Bioseq-set, and merely progresses through the sequence of
193 // contexts.
ContextEnter(void)194 void ContextEnter(void)
195 {
196     if (s_ContextSequence.end() == s_ContextSequenceIter) {
197         s_ContextSequenceIter = s_ContextSequence.begin();
198     } else {
199         ++s_ContextSequenceIter;
200     }
201     s_CurrentContextPtr = *s_ContextSequenceIter;
202 }
203 
204 // This function is called during copying of the Seq-entry, at the end
205 // of the Bioseq or Bioseq-set.
206 // It is empty in this implementation, but could be implemented for a more
207 // advanced splicing algorithm and is therefore provided for symmetry.
ContextLeave(void)208 void ContextLeave(void)
209 {
210 }
211 
212 
213 // This function is called after preprocessing of the Seq-entry, and prepares
214 // for copying.
ContextInit(void)215 void ContextInit(void)
216 {
217     // Indicate that no context has been entered yet.
218     s_ContextSequenceIter = s_ContextSequence.end();
219     s_CurrentContextPtr = NULL;
220 }
221 
222 
223 // This function just records that the current context contains Seq-annot's.
CurrentContextContainsSeqAnnots(void)224 void CurrentContextContainsSeqAnnots(void)
225 {
226     s_CurrentContextPtr->has_annots = true;
227 }
228 
229 
230 // This function translates format names to enum values.
GetFormat(const string & name)231 ESerialDataFormat GetFormat(const string& name)
232 {
233     if (name == "asn") {
234         return eSerial_AsnText;
235     } else if (name == "asnb") {
236         return eSerial_AsnBinary;
237     } else if (name == "xml") {
238         return eSerial_Xml;
239     } else if (name == "json") {
240         return eSerial_Json;
241     } else {
242         // Should be caught by argument processing, but in case of a
243         // programming error...
244         NCBI_THROW(CException, eUnknown, "Bad serial format name " + name);
245     }
246 }
247 
248 
249 // These functions determine if the given Seq-annot choice type matches the
250 // user selection.
IsSeqAnnotChoiceSelected(TSeqAnnotChoiceMaskFlags flags)251 bool IsSeqAnnotChoiceSelected(TSeqAnnotChoiceMaskFlags flags)
252 {
253     return (flags & s_SeqAnnotChoiceMask) != 0;
254 }
IsSeqAnnotChoiceSelected(const CSeq_annot * annot)255 bool IsSeqAnnotChoiceSelected(const CSeq_annot* annot)
256 {
257     return (s_GetSeqAnnotChoiceMask(annot) & s_SeqAnnotChoiceMask) != 0;
258 }
259 
260 
261 // These functions determine if the given Seq-id choice type matches the
262 // user selection.
IsSeqIdChoiceSelected(TSeqIdChoiceMaskFlags flags)263 bool IsSeqIdChoiceSelected(TSeqIdChoiceMaskFlags flags)
264 {
265     return (flags & s_SeqIdChoiceMask) != 0;
266 }
IsSeqIdChoiceSelected(const CSeq_id * seqid)267 bool IsSeqIdChoiceSelected(const CSeq_id* seqid)
268 {
269     return (s_GetSeqIdChoiceMask(seqid) & s_SeqIdChoiceMask) != 0;
270 }
271 
272 
273 // This function is called during the copying of a Seq-annot, and (after
274 // all existing Seq-annot's are copied) splices in any applicable new
275 // Seq-annot's.
ProcessSeqEntryAnnot(unique_ptr<CObjectIStream> & sai,COStreamContainer & osc)276 void ProcessSeqEntryAnnot(unique_ptr<CObjectIStream>& sai,
277                           COStreamContainer& osc)
278 {
279     // Loop through all Seq-id's for this context, and splice appropriate
280     // Seq-annot's.
281     for (const auto& seqid_from_se : s_CurrentContextPtr->seqids) {
282         // See if the Seq-id from the Seq-entry was also in
283         // the Seq-annot's.
284         TSeqIdToAnnotMapIter
285             seqid_in_map_iter = s_SeqIdToAnnotMap.find(seqid_from_se);
286         if (seqid_in_map_iter != s_SeqIdToAnnotMap.end()) {
287             // The current Seq-id for the current Seq-annot for
288             // the current Seq-entry being read may be contained
289             // by a number of in the Seq-annot file.
290             // Find all such Seq-annot's and splice them.
291             s_SpliceAnnotsForSeqId(sai, osc, seqid_in_map_iter);
292 
293             // Track stats.
294             g_Stats->SeqEntry_Changed();
295         } else {
296             // No Seq-annot's need to be spliced for this Seq-id.
297             // Either the Seq-annot file contained no Seq-annot's
298             // that contained this Seq-id, or some Seq-annot that
299             // contains this Seq-id was already spliced, and so
300             // this Seq-id has been removed from the mapping.
301         }
302     }
303 }
304 
305 
306 // This function is called between Seq-entry's, so all context info is reset.
ResetSeqEntryProcessing(void)307 void ResetSeqEntryProcessing(void)
308 {
309     if (NULL != s_RootContextPtr) {
310         s_DeleteContext(s_RootContextPtr);
311     }
312     _ASSERT (s_ContextPtrStack.empty());
313     s_ContextSequence.clear();
314     s_CurrentContextPtr = NULL;
315     s_RootContextPtr = NULL;
316 }
317 
318 
319 // This function associates this Seq-id with the containing Seq-annot.
320 // The mappings created are highly dependent on the splicing algorithm
321 // and may need to be changed for a real Seq-annot splicing application.
SeqAnnotMapSeqId(TSeqRef seqid_in_annot)322 void SeqAnnotMapSeqId(TSeqRef seqid_in_annot)
323 {
324     // Only map selected Seq-id CHOICE types.
325     if ( ! IsSeqIdChoiceSelected(&*seqid_in_annot) ) {
326         return;
327     }
328 
329     /////////////////////////////////////////////////////////
330     // Forward mapping
331 
332     // For this Seq-id, see if it has been mapped to any
333     // annotations yet.  If yes, then make sure this position
334     // is included in the list.  If not, then insert a map
335     // entry with a new vector containing this position.
336     {{
337         // mapentry finds mapping, if one exists:
338         TSeqIdToAnnotMapIter mapentry = s_SeqIdToAnnotMap.find(seqid_in_annot);
339 
340         if (mapentry != s_SeqIdToAnnotMap.end()) {
341             // a mapping exists, so see if the current
342             // position is in the position list
343             TPosContIter
344                 pos = find(mapentry->second->GetData().begin(),
345                            mapentry->second->GetData().end(),
346                            s_AnnotPos);
347             if (pos == mapentry->second->GetData().end()) {
348                 // current position not found in list, so add it
349                 mapentry->second->GetData().push_back(s_AnnotPos);
350             } else {
351                 //do nothing since position already in list
352             }
353         } else {
354             // this Seq-id hasn't been mapped yet, so add a new
355             // map entry with just this position
356             TPosContRef newcont(new TPosContOb);
357             newcont->GetData().push_back(s_AnnotPos);
358             s_SeqIdToAnnotMap.insert(
359                 pair<TSeqRef, TPosContRef>(seqid_in_annot, newcont));
360         }
361     }}
362 
363     /////////////////////////////////////////////////////////
364     // Backward mapping
365 
366     // For this Seq-annot, see if it has been mapped to any
367     // Seq-id's yet.  If yes, then make sure this Seq-id
368     // is included in the list.  If not, then insert a map
369     // entry with a new vector containing this Seq-id.
370     {{
371         // mapentry finds mapping, if one exists:
372         TAnnotToSeqIdMapIter mapentry = s_AnnotToSeqIdMap.find(s_AnnotPos);
373 
374         if (mapentry != s_AnnotToSeqIdMap.end()) {
375             // a mapping exists, so see if the current Seq-id is
376             // in the Seq-id list
377             TSeqRefContIter
378                 seqid = find(mapentry->second->GetData().begin(),
379                              mapentry->second->GetData().end(),
380                              seqid_in_annot);
381             if (seqid == mapentry->second->GetData().end()) {
382                 // current Seq-id not found in list, so add it
383                 mapentry->second->GetData().push_back(seqid_in_annot);
384             } // else do nothing since Seq-id already in list
385         } else {
386             // this position hasn't been mapped yet, so add a new
387             // map entry with just this Seq-id
388             TSeqRefContRef newvec(new TSeqRefContOb);
389             newvec->GetData().push_back(seqid_in_annot);
390             s_AnnotToSeqIdMap.insert(
391                 pair<CNcbiStreampos, TSeqRefContRef>(s_AnnotPos, newvec));
392         }
393     }}
394 }
395 
396 
397 // This function does any needed preprocessing before skipping a Seq-annot.
SeqAnnotSet_Pre(CObjectIStream & in)398 void SeqAnnotSet_Pre(CObjectIStream& in)
399 {
400     // The file contains full Seq-annot's (including header) but we don't
401     // need to read the header in subsequent operations, so just skip it.
402     in.SkipFileHeader(CType<CSeq_annot>().GetTypeInfo());
403 
404     // Record where in the input stream the data for this Seq-annot starts.
405     s_AnnotPos = in.GetStreamPos();
406 }
407 
408 
409 // These functions record the user selection for the Seq-annot choice type.
SetSeqAnnotChoiceMask(const string & mask)410 void SetSeqAnnotChoiceMask(const string& mask)
411 {
412     if (mask == "Default") {
413         SetSeqAnnotChoiceMask(fSAMF_Default);
414         return;
415     } else if (mask == "All") {
416         SetSeqAnnotChoiceMask(fSAMF_All);
417         return;
418     }
419 
420     list<string> flag_list;
421     NStr::Split(mask, "|", flag_list, NStr::fSplit_MergeDelimiters | NStr::fSplit_Truncate);
422     list<string>::iterator fbegin(flag_list.begin());
423     list<string>::iterator fend(flag_list.end());
424     list<string>::iterator found_flag;
425     TSeqAnnotChoiceMaskFlags flags = fSAMF_NotSet;
426 
427 #define ADDFLAG(flag) \
428     if ((found_flag = find(fbegin,fend,#flag)) != fend) flags |= fSAMF_##flag;
429 
430     ADDFLAG(Ftable);
431     ADDFLAG(Align);
432     ADDFLAG(Graph);
433     ADDFLAG(Ids);
434     ADDFLAG(Locs);
435     ADDFLAG(Seq_table);
436 
437 #undef ADDFLAG
438 
439     SetSeqAnnotChoiceMask(flags);
440 }
SetSeqAnnotChoiceMask(const TSeqAnnotChoiceMaskFlags mask)441 void SetSeqAnnotChoiceMask(const TSeqAnnotChoiceMaskFlags mask)
442 {
443     s_SeqAnnotChoiceMask = mask;
444 }
445 
446 
447 // These functions record the user selection for the Seq-id choice type.
SetSeqIdChoiceMask(const string & mask)448 void SetSeqIdChoiceMask(const string& mask)
449 {
450     if (mask == "Default") {
451         SetSeqIdChoiceMask(fSIMF_Default);
452         return;
453     } else if (mask == "All") {
454         SetSeqIdChoiceMask(fSIMF_All);
455         return;
456     } else if (mask == "AllButLocal") {
457         SetSeqIdChoiceMask(fSIMF_AllButLocal);
458         return;
459     }
460 
461     list<string> flag_list;
462     NStr::Split(mask, "|", flag_list, NStr::fSplit_MergeDelimiters | NStr::fSplit_Truncate);
463     list<string>::iterator fbegin(flag_list.begin());
464     list<string>::iterator fend(flag_list.end());
465     list<string>::iterator found_flag;
466     TSeqIdChoiceMaskFlags flags = fSIMF_NotSet;
467 
468 #define ADDFLAG(flag) \
469     if ((found_flag = find(fbegin,fend,#flag)) != fend) flags |= fSIMF_##flag;
470 
471     ADDFLAG(Local);
472     ADDFLAG(Gibbsq);
473     ADDFLAG(Gibbmt);
474     ADDFLAG(Giim);
475     ADDFLAG(Genbank);
476     ADDFLAG(Embl);
477     ADDFLAG(Pir);
478     ADDFLAG(Swissprot);
479     ADDFLAG(Patent);
480     ADDFLAG(Other);
481     ADDFLAG(General);
482     ADDFLAG(Gi);
483     ADDFLAG(Ddbj);
484     ADDFLAG(Prf);
485     ADDFLAG(Pdb);
486     ADDFLAG(Tpg);
487     ADDFLAG(Tpe);
488     ADDFLAG(Tpd);
489     ADDFLAG(Gpipe);
490     ADDFLAG(Named_annot_track);
491 
492 #undef ADDFLAG
493 
494     SetSeqIdChoiceMask(flags);
495 }
SetSeqIdChoiceMask(const TSeqIdChoiceMaskFlags mask)496 void SetSeqIdChoiceMask(const TSeqIdChoiceMaskFlags mask)
497 {
498     s_SeqIdChoiceMask = mask;
499 }
500 
501 
502 ///////////////////////////////////////////////////////////////////////////
503 // Module static functions
504 
505 // This function deletes a context.
s_DeleteContext(TContextPtr context)506 static void s_DeleteContext(TContextPtr context)
507 {
508     while ( ! context->sub_contexts.empty()) {
509         s_DeleteContext(context->sub_contexts.back());
510         context->sub_contexts.pop_back();
511     }
512     delete context;
513 }
514 
515 
516 // The following function could be used for debugging.
517 #if 0
518 // This function prints out the context tree.
519 static void s_DumpContext(TContextPtr context)
520 {
521     static int indent = 0;
522 
523     string s1 = string(4 * indent, ' ');
524     string s2 = string(4 * (indent+1), ' ');
525     string s3 = string(4 * (indent+2), ' ');
526     cout << s1 << "Context {" << endl;
527     cout << s2 << "type: " << context->type << endl;
528     cout << s2 << "has_annots: " << context->has_annots << endl;
529     if (context->seqids.size() > 0) {
530         cout << s2 << "seqids: " << endl;
531         for(const auto& id : context->seqids) {
532             cout << s3 << "seqid: " << id->GetSeqIdString(true) << endl;
533         }
534     } else {
535         cout << s2 << "seqids: (none)" << endl;
536     }
537 
538     ++indent;
539     for(const auto& sub : context->sub_contexts) {
540         s_DumpContext(sub);
541     }
542     --indent;
543 
544     cout << s1 << "}" << endl;
545 }
546 #endif
547 
548 
549 // This function returns the user selection for the Seq-annot choice type.
s_GetSeqAnnotChoiceMask(const CSeq_annot * annot)550 static TSeqAnnotChoiceMaskFlags s_GetSeqAnnotChoiceMask(const CSeq_annot* annot)
551 {
552     if ( ! annot->CanGetData() || ! annot->IsSetData() ) {
553         return fSAMF_NotSet;
554     }
555     switch (annot->GetData().Which()) {
556         case CSeq_annot_Base::C_Data::e_not_set:    return fSAMF_NotSet;
557         case CSeq_annot_Base::C_Data::e_Ftable:     return fSAMF_Ftable;
558         case CSeq_annot_Base::C_Data::e_Align:      return fSAMF_Align;
559         case CSeq_annot_Base::C_Data::e_Graph:      return fSAMF_Graph;
560         case CSeq_annot_Base::C_Data::e_Ids:        return fSAMF_Ids;
561         case CSeq_annot_Base::C_Data::e_Locs:       return fSAMF_Locs;
562         case CSeq_annot_Base::C_Data::e_Seq_table:  return fSAMF_Seq_table;
563         default: {
564             NCBI_THROW(CException, eUnknown, "Unexpected Seq-annot mask");
565         }
566     }
567 }
568 
569 
570 // This function returns the user selection for the Seq-id choice type.
s_GetSeqIdChoiceMask(const CSeq_id * seqid)571 static TSeqIdChoiceMaskFlags s_GetSeqIdChoiceMask(const CSeq_id* seqid)
572 {
573     switch (seqid->Which()) {
574         case CSeq_id_Base::e_not_set:           return fSIMF_NotSet;
575         case CSeq_id_Base::e_Local:             return fSIMF_Local;
576         case CSeq_id_Base::e_Gibbsq:            return fSIMF_Gibbsq;
577         case CSeq_id_Base::e_Gibbmt:            return fSIMF_Gibbmt;
578         case CSeq_id_Base::e_Giim:              return fSIMF_Giim;
579         case CSeq_id_Base::e_Genbank:           return fSIMF_Genbank;
580         case CSeq_id_Base::e_Embl:              return fSIMF_Embl;
581         case CSeq_id_Base::e_Pir:               return fSIMF_Pir;
582         case CSeq_id_Base::e_Swissprot:         return fSIMF_Swissprot;
583         case CSeq_id_Base::e_Patent:            return fSIMF_Patent;
584         case CSeq_id_Base::e_Other:             return fSIMF_Other;
585         case CSeq_id_Base::e_General:           return fSIMF_General;
586         case CSeq_id_Base::e_Gi:                return fSIMF_Gi;
587         case CSeq_id_Base::e_Ddbj:              return fSIMF_Ddbj;
588         case CSeq_id_Base::e_Prf:               return fSIMF_Prf;
589         case CSeq_id_Base::e_Pdb:               return fSIMF_Pdb;
590         case CSeq_id_Base::e_Tpg:               return fSIMF_Tpg;
591         case CSeq_id_Base::e_Tpe:               return fSIMF_Tpe;
592         case CSeq_id_Base::e_Tpd:               return fSIMF_Tpd;
593         case CSeq_id_Base::e_Gpipe:             return fSIMF_Gpipe;
594         case CSeq_id_Base::e_Named_annot_track: return fSIMF_Named_annot_track;
595         default: {
596             NCBI_THROW(CException, eUnknown, "Unexpected Seq-id mask");
597         }
598     }
599 }
600 
601 
602 // This function removes a Seq-annot from both maps.
s_RemoveAnnot(TAnnotToSeqIdMapIter annot_in_map_iter)603 static bool s_RemoveAnnot(TAnnotToSeqIdMapIter annot_in_map_iter)
604 {
605     // Track whether or not the original container was removed.
606     bool container_removed = false;
607 
608     // Loop through all Seq-id's for this Seq-annot and remove their
609     // link back to this Seq-annot.
610     // Note: This loops through Seq-id's for this particular Seq-annot,
611     // not the Seq-id's in the global map.  It then looks up the
612     // corresponding Seq-id in the global map.
613     // This will erase elements from Seq-annot's container.
614 
615     TSeqRefCont& seq_list(annot_in_map_iter->second->GetData());
616     for (const auto& seq : seq_list) {
617         // Find the Seq-id in the global map that corresponds to the
618         // Seq-id in this Seq-annot's list.
619         TSeqIdToAnnotMapIter seqid_in_map = s_SeqIdToAnnotMap.find(seq);
620         _ASSERT(seqid_in_map != s_SeqIdToAnnotMap.end());
621 
622         // Find the link to this Seq-annot in this Seq-id's list.
623         TPosCont&    annot_list(seqid_in_map->second->GetData());
624         TPosContIter seqannot = find(annot_list.begin(), annot_list.end(), annot_in_map_iter->first);
625         _ASSERT(seqannot != annot_list.end());
626 
627         // Erase the link to this Seq-annot from this Seq-id.
628         // If this was the initial Seq-id, make sure the iterator doesn't
629         // get trashed.
630         annot_list.erase(seqannot);
631 
632         // Erase this Seq-id from the map if it no longer has any Seq-annot's.
633         if (annot_list.empty()) {
634             s_SeqIdToAnnotMap.erase(seqid_in_map);
635             container_removed = true;
636         }
637     }
638 
639     // Remove this Seq-annot from the map.
640     s_AnnotToSeqIdMap.erase(annot_in_map_iter);
641 
642     return container_removed;
643 }
644 
645 
646 // This function splices a Seq-annot from the Seq-annot stream to the
647 // new Seq-entry file, and calls the function that removes the Seq-annot
648 // from the mappings.
s_SpliceAnnot(unique_ptr<CObjectIStream> & sai,COStreamContainer & osc,TAnnotToSeqIdMapIter annot_in_map)649 static bool s_SpliceAnnot(unique_ptr<CObjectIStream>& sai,
650                           COStreamContainer& osc,
651                           TAnnotToSeqIdMapIter annot_in_map)
652 {
653     // Seek to to the start of this Seq-annot in the Seq-annot stream.
654     sai->SetStreamPos(annot_in_map->first);
655 
656     // Read Seq-annot locally (not saved outside this scope).
657     CRef<CSeq_annot> annot(new CSeq_annot);
658     sai->Read(&*annot, CType<CSeq_annot>().GetTypeInfo(),
659              CObjectIStream::eNoFileHeader);
660 
661     // Splice the Seq-annot.
662     osc << *annot;
663 
664     // Track stats.
665     g_Stats->SeqAnnot_Spliced();
666 
667     // Now we don't need to splice this Seq-annot for any other Seq-id's,
668     // so remove it from the maps.
669     return s_RemoveAnnot(annot_in_map);
670 }
671 
672 
673 // This function splices all the Seq-annot's for a given Seq-id.
s_SpliceAnnotsForSeqId(unique_ptr<CObjectIStream> & sai,COStreamContainer & osc,TSeqIdToAnnotMapIter seqid_iter)674 static void s_SpliceAnnotsForSeqId(unique_ptr<CObjectIStream>& sai,
675                                    COStreamContainer& osc,
676                                    TSeqIdToAnnotMapIter seqid_iter)
677 {
678     // Loop through all Seq-annot's for this Seq-id.
679     TPosCont&       pos_list = seqid_iter->second->GetData();
680     TPosContIter    pos, next_pos;
681     for ( pos = pos_list.begin(); pos != pos_list.end(); pos = next_pos ) {
682         // Save next iterator, in case this one gets removed by splicing.
683         next_pos = pos;
684         ++next_pos;
685 
686         // Find the Seq-annot in the map that corresponds to the Seq-annot
687         // in this Seq-id's list.
688         TAnnotToSeqIdMapIter annot_in_map = s_AnnotToSeqIdMap.find(*pos);
689         _ASSERT(annot_in_map != s_AnnotToSeqIdMap.end());
690 
691         // Splice (and remove) this Seq-annot.
692         // Removal will advance the iterator.  After the last Seq-annot for
693         // this Seq-id is removed, the Seq-id itself will be removed from
694         // the map (invalidating the local iterator), so the loop will be
695         // broken out of.
696         if (s_SpliceAnnot(sai, osc, annot_in_map)) {
697             break;
698         }
699     }
700 }
701