1 /* $Id: seqannot_splicer_util.cpp 607723 2020-05-06 18:48:03Z ivanov $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: David McElhany
27 *
28 * File Description:
29 * See main application file.
30 *
31 * ===========================================================================
32 */
33
34 #include <ncbi_pch.hpp>
35 #include <stack>
36
37 #include <corelib/ncbiexpt.hpp>
38 #include <corelib/ncbimisc.hpp>
39 #include <corelib/ncbiobj.hpp>
40
41 #include <objects/seq/Seq_annot.hpp>
42 #include <objects/seqloc/Seq_id.hpp>
43
44 #include <serial/objectio.hpp>
45 #include <serial/objistr.hpp>
46 #include <serial/serialdef.hpp>
47
48 #include "seqannot_splicer_stats.hpp"
49 #include "seqannot_splicer_util.hpp"
50
51 USING_SCOPE(ncbi);
52 USING_SCOPE(objects);
53
54
55 ///////////////////////////////////////////////////////////////////////////
56 // Typedefs
57
58 // TSeqRefCont _could_ be different than TIds, so give it its own type.
59 typedef list<TSeqRef> TSeqRefCont;
60 typedef list<CNcbiStreampos> TPosCont;
61
62 typedef CObjectFor<TSeqRefCont> TSeqRefContOb;
63 typedef CObjectFor<TPosCont> TPosContOb;
64
65 typedef CRef<TSeqRefContOb> TSeqRefContRef;
66 typedef CRef<TPosContOb> TPosContRef;
67
68 typedef set<TSeqRef,PPtrLess<TSeqRef> > TContextSeqIds;
69 typedef struct SContext_tag* TContextPtr;
70 typedef list<TContextPtr> TContextList;
71 typedef stack<TContextPtr> TContextPtrStack;
72
73 typedef map<CNcbiStreampos,TSeqRefContRef> TAnnotToSeqIdMap;
74 typedef map<TSeqRef,TPosContRef,PPtrLess<TSeqRef> > TSeqIdToAnnotMap;
75
76 typedef TAnnotToSeqIdMap::const_iterator TAnnotToSeqIdMapCIter;
77 typedef TAnnotToSeqIdMap::iterator TAnnotToSeqIdMapIter;
78 typedef TContextList::iterator TContextListIter;
79 typedef TPosCont::iterator TPosContIter;
80 typedef TSeqIdToAnnotMap::const_iterator TSeqIdToAnnotMapCIter;
81 typedef TSeqIdToAnnotMap::iterator TSeqIdToAnnotMapIter;
82 typedef TSeqRefCont::iterator TSeqRefContIter;
83
84 typedef struct SContext_tag {
85 EContextType type;
86 bool has_annots;
87 TContextSeqIds seqids;
88 TContextList sub_contexts;
89 } SContext;
90
91
92 ///////////////////////////////////////////////////////////////////////////
93 // Module static functions and objects
94
95 static void s_DeleteContext(TContextPtr context);
96
97 // The following function could be used for debugging.
98 #if 0
99 static void s_DumpContext(TContextPtr context);
100 #endif
101
102 static TSeqAnnotChoiceMaskFlags s_GetSeqAnnotChoiceMask(const CSeq_annot* annot);
103
104 static TSeqIdChoiceMaskFlags s_GetSeqIdChoiceMask(const CSeq_id* seqid);
105
106 static bool s_RemoveAnnot(TAnnotToSeqIdMapIter annot_in_map_iter);
107
108 static bool s_SpliceAnnot(unique_ptr<CObjectIStream>& sai,
109 COStreamContainer& osc,
110 TAnnotToSeqIdMapIter annot_in_map);
111
112 static void s_SpliceAnnotsForSeqId(unique_ptr<CObjectIStream>& sai,
113 COStreamContainer& osc,
114 TSeqIdToAnnotMapIter seqid_iter);
115
116
117 // These static global objects defined here for a test simplicity.
118 // DO NOT USE static non-POD types in your program!!!
119
120 static TAnnotToSeqIdMap s_AnnotToSeqIdMap;
121 static TSeqIdToAnnotMap s_SeqIdToAnnotMap;
122
123 static TContextPtr s_CurrentContextPtr = NULL;
124 static TContextPtr s_RootContextPtr = NULL;
125 static TContextList s_ContextSequence;
126 static TContextListIter s_ContextSequenceIter;
127 static TContextPtrStack s_ContextPtrStack;
128
129 static TSeqAnnotChoiceMaskFlags s_SeqAnnotChoiceMask = fSAMF_Default;
130 static TSeqIdChoiceMaskFlags s_SeqIdChoiceMask = fSIMF_Default;
131
132 static CNcbiStreampos s_AnnotPos;
133
134
135 ///////////////////////////////////////////////////////////////////////////
136 // Program utility functions
137
138 // This function is called during preprocessing of the Seq-entry and adds
139 // a given Seq-id to the current context.
AddSeqIdToCurrentContext(TSeqRef id)140 void AddSeqIdToCurrentContext(TSeqRef id)
141 {
142 // Only add selected Seq-id CHOICE types.
143 if (IsSeqIdChoiceSelected(id)) {
144 s_CurrentContextPtr->seqids.insert(id);
145 }
146 }
147
148
149 // This function is called during preprocessing of the Seq-entry, at the end
150 // of the Bioseq or Bioseq-set, and helps create the context tree.
ContextEnd(void)151 void ContextEnd(void)
152 {
153 s_ContextPtrStack.pop();
154 if (s_ContextPtrStack.empty()) {
155 s_CurrentContextPtr = NULL;
156 } else {
157 s_CurrentContextPtr = s_ContextPtrStack.top();
158 }
159 }
160
161 // This function is called during preprocessing of the Seq-entry, at the start
162 // of the Bioseq or Bioseq-set, and helps create the context tree.
ContextStart(CObjectIStream & in,EContextType type)163 void ContextStart(CObjectIStream& in, EContextType type)
164 {
165 // Create and populate new context.
166 TContextPtr context = new SContext;
167 context->type = type;
168 context->has_annots = false;
169 context->seqids.clear();
170 context->sub_contexts.clear();
171
172 // Insert the new context into the full context tree.
173 if (NULL == s_RootContextPtr) {
174 s_RootContextPtr = context;
175 } else {
176 s_CurrentContextPtr->sub_contexts.push_back(context);
177 }
178
179 // Use a stack to track the history of current context pointers.
180 s_ContextPtrStack.push(context);
181
182 // Keep a full chronological sequence of context pointers for use when
183 // copying.
184 s_ContextSequence.push_back(context);
185
186 // Use a convenience variable to track the current context.
187 s_CurrentContextPtr = context;
188 }
189
190
191 // This function is called during copying of the Seq-entry, at the start
192 // of the Bioseq or Bioseq-set, and merely progresses through the sequence of
193 // contexts.
ContextEnter(void)194 void ContextEnter(void)
195 {
196 if (s_ContextSequence.end() == s_ContextSequenceIter) {
197 s_ContextSequenceIter = s_ContextSequence.begin();
198 } else {
199 ++s_ContextSequenceIter;
200 }
201 s_CurrentContextPtr = *s_ContextSequenceIter;
202 }
203
204 // This function is called during copying of the Seq-entry, at the end
205 // of the Bioseq or Bioseq-set.
206 // It is empty in this implementation, but could be implemented for a more
207 // advanced splicing algorithm and is therefore provided for symmetry.
ContextLeave(void)208 void ContextLeave(void)
209 {
210 }
211
212
213 // This function is called after preprocessing of the Seq-entry, and prepares
214 // for copying.
ContextInit(void)215 void ContextInit(void)
216 {
217 // Indicate that no context has been entered yet.
218 s_ContextSequenceIter = s_ContextSequence.end();
219 s_CurrentContextPtr = NULL;
220 }
221
222
223 // This function just records that the current context contains Seq-annot's.
CurrentContextContainsSeqAnnots(void)224 void CurrentContextContainsSeqAnnots(void)
225 {
226 s_CurrentContextPtr->has_annots = true;
227 }
228
229
230 // This function translates format names to enum values.
GetFormat(const string & name)231 ESerialDataFormat GetFormat(const string& name)
232 {
233 if (name == "asn") {
234 return eSerial_AsnText;
235 } else if (name == "asnb") {
236 return eSerial_AsnBinary;
237 } else if (name == "xml") {
238 return eSerial_Xml;
239 } else if (name == "json") {
240 return eSerial_Json;
241 } else {
242 // Should be caught by argument processing, but in case of a
243 // programming error...
244 NCBI_THROW(CException, eUnknown, "Bad serial format name " + name);
245 }
246 }
247
248
249 // These functions determine if the given Seq-annot choice type matches the
250 // user selection.
IsSeqAnnotChoiceSelected(TSeqAnnotChoiceMaskFlags flags)251 bool IsSeqAnnotChoiceSelected(TSeqAnnotChoiceMaskFlags flags)
252 {
253 return (flags & s_SeqAnnotChoiceMask) != 0;
254 }
IsSeqAnnotChoiceSelected(const CSeq_annot * annot)255 bool IsSeqAnnotChoiceSelected(const CSeq_annot* annot)
256 {
257 return (s_GetSeqAnnotChoiceMask(annot) & s_SeqAnnotChoiceMask) != 0;
258 }
259
260
261 // These functions determine if the given Seq-id choice type matches the
262 // user selection.
IsSeqIdChoiceSelected(TSeqIdChoiceMaskFlags flags)263 bool IsSeqIdChoiceSelected(TSeqIdChoiceMaskFlags flags)
264 {
265 return (flags & s_SeqIdChoiceMask) != 0;
266 }
IsSeqIdChoiceSelected(const CSeq_id * seqid)267 bool IsSeqIdChoiceSelected(const CSeq_id* seqid)
268 {
269 return (s_GetSeqIdChoiceMask(seqid) & s_SeqIdChoiceMask) != 0;
270 }
271
272
273 // This function is called during the copying of a Seq-annot, and (after
274 // all existing Seq-annot's are copied) splices in any applicable new
275 // Seq-annot's.
ProcessSeqEntryAnnot(unique_ptr<CObjectIStream> & sai,COStreamContainer & osc)276 void ProcessSeqEntryAnnot(unique_ptr<CObjectIStream>& sai,
277 COStreamContainer& osc)
278 {
279 // Loop through all Seq-id's for this context, and splice appropriate
280 // Seq-annot's.
281 for (const auto& seqid_from_se : s_CurrentContextPtr->seqids) {
282 // See if the Seq-id from the Seq-entry was also in
283 // the Seq-annot's.
284 TSeqIdToAnnotMapIter
285 seqid_in_map_iter = s_SeqIdToAnnotMap.find(seqid_from_se);
286 if (seqid_in_map_iter != s_SeqIdToAnnotMap.end()) {
287 // The current Seq-id for the current Seq-annot for
288 // the current Seq-entry being read may be contained
289 // by a number of in the Seq-annot file.
290 // Find all such Seq-annot's and splice them.
291 s_SpliceAnnotsForSeqId(sai, osc, seqid_in_map_iter);
292
293 // Track stats.
294 g_Stats->SeqEntry_Changed();
295 } else {
296 // No Seq-annot's need to be spliced for this Seq-id.
297 // Either the Seq-annot file contained no Seq-annot's
298 // that contained this Seq-id, or some Seq-annot that
299 // contains this Seq-id was already spliced, and so
300 // this Seq-id has been removed from the mapping.
301 }
302 }
303 }
304
305
306 // This function is called between Seq-entry's, so all context info is reset.
ResetSeqEntryProcessing(void)307 void ResetSeqEntryProcessing(void)
308 {
309 if (NULL != s_RootContextPtr) {
310 s_DeleteContext(s_RootContextPtr);
311 }
312 _ASSERT (s_ContextPtrStack.empty());
313 s_ContextSequence.clear();
314 s_CurrentContextPtr = NULL;
315 s_RootContextPtr = NULL;
316 }
317
318
319 // This function associates this Seq-id with the containing Seq-annot.
320 // The mappings created are highly dependent on the splicing algorithm
321 // and may need to be changed for a real Seq-annot splicing application.
SeqAnnotMapSeqId(TSeqRef seqid_in_annot)322 void SeqAnnotMapSeqId(TSeqRef seqid_in_annot)
323 {
324 // Only map selected Seq-id CHOICE types.
325 if ( ! IsSeqIdChoiceSelected(&*seqid_in_annot) ) {
326 return;
327 }
328
329 /////////////////////////////////////////////////////////
330 // Forward mapping
331
332 // For this Seq-id, see if it has been mapped to any
333 // annotations yet. If yes, then make sure this position
334 // is included in the list. If not, then insert a map
335 // entry with a new vector containing this position.
336 {{
337 // mapentry finds mapping, if one exists:
338 TSeqIdToAnnotMapIter mapentry = s_SeqIdToAnnotMap.find(seqid_in_annot);
339
340 if (mapentry != s_SeqIdToAnnotMap.end()) {
341 // a mapping exists, so see if the current
342 // position is in the position list
343 TPosContIter
344 pos = find(mapentry->second->GetData().begin(),
345 mapentry->second->GetData().end(),
346 s_AnnotPos);
347 if (pos == mapentry->second->GetData().end()) {
348 // current position not found in list, so add it
349 mapentry->second->GetData().push_back(s_AnnotPos);
350 } else {
351 //do nothing since position already in list
352 }
353 } else {
354 // this Seq-id hasn't been mapped yet, so add a new
355 // map entry with just this position
356 TPosContRef newcont(new TPosContOb);
357 newcont->GetData().push_back(s_AnnotPos);
358 s_SeqIdToAnnotMap.insert(
359 pair<TSeqRef, TPosContRef>(seqid_in_annot, newcont));
360 }
361 }}
362
363 /////////////////////////////////////////////////////////
364 // Backward mapping
365
366 // For this Seq-annot, see if it has been mapped to any
367 // Seq-id's yet. If yes, then make sure this Seq-id
368 // is included in the list. If not, then insert a map
369 // entry with a new vector containing this Seq-id.
370 {{
371 // mapentry finds mapping, if one exists:
372 TAnnotToSeqIdMapIter mapentry = s_AnnotToSeqIdMap.find(s_AnnotPos);
373
374 if (mapentry != s_AnnotToSeqIdMap.end()) {
375 // a mapping exists, so see if the current Seq-id is
376 // in the Seq-id list
377 TSeqRefContIter
378 seqid = find(mapentry->second->GetData().begin(),
379 mapentry->second->GetData().end(),
380 seqid_in_annot);
381 if (seqid == mapentry->second->GetData().end()) {
382 // current Seq-id not found in list, so add it
383 mapentry->second->GetData().push_back(seqid_in_annot);
384 } // else do nothing since Seq-id already in list
385 } else {
386 // this position hasn't been mapped yet, so add a new
387 // map entry with just this Seq-id
388 TSeqRefContRef newvec(new TSeqRefContOb);
389 newvec->GetData().push_back(seqid_in_annot);
390 s_AnnotToSeqIdMap.insert(
391 pair<CNcbiStreampos, TSeqRefContRef>(s_AnnotPos, newvec));
392 }
393 }}
394 }
395
396
397 // This function does any needed preprocessing before skipping a Seq-annot.
SeqAnnotSet_Pre(CObjectIStream & in)398 void SeqAnnotSet_Pre(CObjectIStream& in)
399 {
400 // The file contains full Seq-annot's (including header) but we don't
401 // need to read the header in subsequent operations, so just skip it.
402 in.SkipFileHeader(CType<CSeq_annot>().GetTypeInfo());
403
404 // Record where in the input stream the data for this Seq-annot starts.
405 s_AnnotPos = in.GetStreamPos();
406 }
407
408
409 // These functions record the user selection for the Seq-annot choice type.
SetSeqAnnotChoiceMask(const string & mask)410 void SetSeqAnnotChoiceMask(const string& mask)
411 {
412 if (mask == "Default") {
413 SetSeqAnnotChoiceMask(fSAMF_Default);
414 return;
415 } else if (mask == "All") {
416 SetSeqAnnotChoiceMask(fSAMF_All);
417 return;
418 }
419
420 list<string> flag_list;
421 NStr::Split(mask, "|", flag_list, NStr::fSplit_MergeDelimiters | NStr::fSplit_Truncate);
422 list<string>::iterator fbegin(flag_list.begin());
423 list<string>::iterator fend(flag_list.end());
424 list<string>::iterator found_flag;
425 TSeqAnnotChoiceMaskFlags flags = fSAMF_NotSet;
426
427 #define ADDFLAG(flag) \
428 if ((found_flag = find(fbegin,fend,#flag)) != fend) flags |= fSAMF_##flag;
429
430 ADDFLAG(Ftable);
431 ADDFLAG(Align);
432 ADDFLAG(Graph);
433 ADDFLAG(Ids);
434 ADDFLAG(Locs);
435 ADDFLAG(Seq_table);
436
437 #undef ADDFLAG
438
439 SetSeqAnnotChoiceMask(flags);
440 }
SetSeqAnnotChoiceMask(const TSeqAnnotChoiceMaskFlags mask)441 void SetSeqAnnotChoiceMask(const TSeqAnnotChoiceMaskFlags mask)
442 {
443 s_SeqAnnotChoiceMask = mask;
444 }
445
446
447 // These functions record the user selection for the Seq-id choice type.
SetSeqIdChoiceMask(const string & mask)448 void SetSeqIdChoiceMask(const string& mask)
449 {
450 if (mask == "Default") {
451 SetSeqIdChoiceMask(fSIMF_Default);
452 return;
453 } else if (mask == "All") {
454 SetSeqIdChoiceMask(fSIMF_All);
455 return;
456 } else if (mask == "AllButLocal") {
457 SetSeqIdChoiceMask(fSIMF_AllButLocal);
458 return;
459 }
460
461 list<string> flag_list;
462 NStr::Split(mask, "|", flag_list, NStr::fSplit_MergeDelimiters | NStr::fSplit_Truncate);
463 list<string>::iterator fbegin(flag_list.begin());
464 list<string>::iterator fend(flag_list.end());
465 list<string>::iterator found_flag;
466 TSeqIdChoiceMaskFlags flags = fSIMF_NotSet;
467
468 #define ADDFLAG(flag) \
469 if ((found_flag = find(fbegin,fend,#flag)) != fend) flags |= fSIMF_##flag;
470
471 ADDFLAG(Local);
472 ADDFLAG(Gibbsq);
473 ADDFLAG(Gibbmt);
474 ADDFLAG(Giim);
475 ADDFLAG(Genbank);
476 ADDFLAG(Embl);
477 ADDFLAG(Pir);
478 ADDFLAG(Swissprot);
479 ADDFLAG(Patent);
480 ADDFLAG(Other);
481 ADDFLAG(General);
482 ADDFLAG(Gi);
483 ADDFLAG(Ddbj);
484 ADDFLAG(Prf);
485 ADDFLAG(Pdb);
486 ADDFLAG(Tpg);
487 ADDFLAG(Tpe);
488 ADDFLAG(Tpd);
489 ADDFLAG(Gpipe);
490 ADDFLAG(Named_annot_track);
491
492 #undef ADDFLAG
493
494 SetSeqIdChoiceMask(flags);
495 }
SetSeqIdChoiceMask(const TSeqIdChoiceMaskFlags mask)496 void SetSeqIdChoiceMask(const TSeqIdChoiceMaskFlags mask)
497 {
498 s_SeqIdChoiceMask = mask;
499 }
500
501
502 ///////////////////////////////////////////////////////////////////////////
503 // Module static functions
504
505 // This function deletes a context.
s_DeleteContext(TContextPtr context)506 static void s_DeleteContext(TContextPtr context)
507 {
508 while ( ! context->sub_contexts.empty()) {
509 s_DeleteContext(context->sub_contexts.back());
510 context->sub_contexts.pop_back();
511 }
512 delete context;
513 }
514
515
516 // The following function could be used for debugging.
517 #if 0
518 // This function prints out the context tree.
519 static void s_DumpContext(TContextPtr context)
520 {
521 static int indent = 0;
522
523 string s1 = string(4 * indent, ' ');
524 string s2 = string(4 * (indent+1), ' ');
525 string s3 = string(4 * (indent+2), ' ');
526 cout << s1 << "Context {" << endl;
527 cout << s2 << "type: " << context->type << endl;
528 cout << s2 << "has_annots: " << context->has_annots << endl;
529 if (context->seqids.size() > 0) {
530 cout << s2 << "seqids: " << endl;
531 for(const auto& id : context->seqids) {
532 cout << s3 << "seqid: " << id->GetSeqIdString(true) << endl;
533 }
534 } else {
535 cout << s2 << "seqids: (none)" << endl;
536 }
537
538 ++indent;
539 for(const auto& sub : context->sub_contexts) {
540 s_DumpContext(sub);
541 }
542 --indent;
543
544 cout << s1 << "}" << endl;
545 }
546 #endif
547
548
549 // This function returns the user selection for the Seq-annot choice type.
s_GetSeqAnnotChoiceMask(const CSeq_annot * annot)550 static TSeqAnnotChoiceMaskFlags s_GetSeqAnnotChoiceMask(const CSeq_annot* annot)
551 {
552 if ( ! annot->CanGetData() || ! annot->IsSetData() ) {
553 return fSAMF_NotSet;
554 }
555 switch (annot->GetData().Which()) {
556 case CSeq_annot_Base::C_Data::e_not_set: return fSAMF_NotSet;
557 case CSeq_annot_Base::C_Data::e_Ftable: return fSAMF_Ftable;
558 case CSeq_annot_Base::C_Data::e_Align: return fSAMF_Align;
559 case CSeq_annot_Base::C_Data::e_Graph: return fSAMF_Graph;
560 case CSeq_annot_Base::C_Data::e_Ids: return fSAMF_Ids;
561 case CSeq_annot_Base::C_Data::e_Locs: return fSAMF_Locs;
562 case CSeq_annot_Base::C_Data::e_Seq_table: return fSAMF_Seq_table;
563 default: {
564 NCBI_THROW(CException, eUnknown, "Unexpected Seq-annot mask");
565 }
566 }
567 }
568
569
570 // This function returns the user selection for the Seq-id choice type.
s_GetSeqIdChoiceMask(const CSeq_id * seqid)571 static TSeqIdChoiceMaskFlags s_GetSeqIdChoiceMask(const CSeq_id* seqid)
572 {
573 switch (seqid->Which()) {
574 case CSeq_id_Base::e_not_set: return fSIMF_NotSet;
575 case CSeq_id_Base::e_Local: return fSIMF_Local;
576 case CSeq_id_Base::e_Gibbsq: return fSIMF_Gibbsq;
577 case CSeq_id_Base::e_Gibbmt: return fSIMF_Gibbmt;
578 case CSeq_id_Base::e_Giim: return fSIMF_Giim;
579 case CSeq_id_Base::e_Genbank: return fSIMF_Genbank;
580 case CSeq_id_Base::e_Embl: return fSIMF_Embl;
581 case CSeq_id_Base::e_Pir: return fSIMF_Pir;
582 case CSeq_id_Base::e_Swissprot: return fSIMF_Swissprot;
583 case CSeq_id_Base::e_Patent: return fSIMF_Patent;
584 case CSeq_id_Base::e_Other: return fSIMF_Other;
585 case CSeq_id_Base::e_General: return fSIMF_General;
586 case CSeq_id_Base::e_Gi: return fSIMF_Gi;
587 case CSeq_id_Base::e_Ddbj: return fSIMF_Ddbj;
588 case CSeq_id_Base::e_Prf: return fSIMF_Prf;
589 case CSeq_id_Base::e_Pdb: return fSIMF_Pdb;
590 case CSeq_id_Base::e_Tpg: return fSIMF_Tpg;
591 case CSeq_id_Base::e_Tpe: return fSIMF_Tpe;
592 case CSeq_id_Base::e_Tpd: return fSIMF_Tpd;
593 case CSeq_id_Base::e_Gpipe: return fSIMF_Gpipe;
594 case CSeq_id_Base::e_Named_annot_track: return fSIMF_Named_annot_track;
595 default: {
596 NCBI_THROW(CException, eUnknown, "Unexpected Seq-id mask");
597 }
598 }
599 }
600
601
602 // This function removes a Seq-annot from both maps.
s_RemoveAnnot(TAnnotToSeqIdMapIter annot_in_map_iter)603 static bool s_RemoveAnnot(TAnnotToSeqIdMapIter annot_in_map_iter)
604 {
605 // Track whether or not the original container was removed.
606 bool container_removed = false;
607
608 // Loop through all Seq-id's for this Seq-annot and remove their
609 // link back to this Seq-annot.
610 // Note: This loops through Seq-id's for this particular Seq-annot,
611 // not the Seq-id's in the global map. It then looks up the
612 // corresponding Seq-id in the global map.
613 // This will erase elements from Seq-annot's container.
614
615 TSeqRefCont& seq_list(annot_in_map_iter->second->GetData());
616 for (const auto& seq : seq_list) {
617 // Find the Seq-id in the global map that corresponds to the
618 // Seq-id in this Seq-annot's list.
619 TSeqIdToAnnotMapIter seqid_in_map = s_SeqIdToAnnotMap.find(seq);
620 _ASSERT(seqid_in_map != s_SeqIdToAnnotMap.end());
621
622 // Find the link to this Seq-annot in this Seq-id's list.
623 TPosCont& annot_list(seqid_in_map->second->GetData());
624 TPosContIter seqannot = find(annot_list.begin(), annot_list.end(), annot_in_map_iter->first);
625 _ASSERT(seqannot != annot_list.end());
626
627 // Erase the link to this Seq-annot from this Seq-id.
628 // If this was the initial Seq-id, make sure the iterator doesn't
629 // get trashed.
630 annot_list.erase(seqannot);
631
632 // Erase this Seq-id from the map if it no longer has any Seq-annot's.
633 if (annot_list.empty()) {
634 s_SeqIdToAnnotMap.erase(seqid_in_map);
635 container_removed = true;
636 }
637 }
638
639 // Remove this Seq-annot from the map.
640 s_AnnotToSeqIdMap.erase(annot_in_map_iter);
641
642 return container_removed;
643 }
644
645
646 // This function splices a Seq-annot from the Seq-annot stream to the
647 // new Seq-entry file, and calls the function that removes the Seq-annot
648 // from the mappings.
s_SpliceAnnot(unique_ptr<CObjectIStream> & sai,COStreamContainer & osc,TAnnotToSeqIdMapIter annot_in_map)649 static bool s_SpliceAnnot(unique_ptr<CObjectIStream>& sai,
650 COStreamContainer& osc,
651 TAnnotToSeqIdMapIter annot_in_map)
652 {
653 // Seek to to the start of this Seq-annot in the Seq-annot stream.
654 sai->SetStreamPos(annot_in_map->first);
655
656 // Read Seq-annot locally (not saved outside this scope).
657 CRef<CSeq_annot> annot(new CSeq_annot);
658 sai->Read(&*annot, CType<CSeq_annot>().GetTypeInfo(),
659 CObjectIStream::eNoFileHeader);
660
661 // Splice the Seq-annot.
662 osc << *annot;
663
664 // Track stats.
665 g_Stats->SeqAnnot_Spliced();
666
667 // Now we don't need to splice this Seq-annot for any other Seq-id's,
668 // so remove it from the maps.
669 return s_RemoveAnnot(annot_in_map);
670 }
671
672
673 // This function splices all the Seq-annot's for a given Seq-id.
s_SpliceAnnotsForSeqId(unique_ptr<CObjectIStream> & sai,COStreamContainer & osc,TSeqIdToAnnotMapIter seqid_iter)674 static void s_SpliceAnnotsForSeqId(unique_ptr<CObjectIStream>& sai,
675 COStreamContainer& osc,
676 TSeqIdToAnnotMapIter seqid_iter)
677 {
678 // Loop through all Seq-annot's for this Seq-id.
679 TPosCont& pos_list = seqid_iter->second->GetData();
680 TPosContIter pos, next_pos;
681 for ( pos = pos_list.begin(); pos != pos_list.end(); pos = next_pos ) {
682 // Save next iterator, in case this one gets removed by splicing.
683 next_pos = pos;
684 ++next_pos;
685
686 // Find the Seq-annot in the map that corresponds to the Seq-annot
687 // in this Seq-id's list.
688 TAnnotToSeqIdMapIter annot_in_map = s_AnnotToSeqIdMap.find(*pos);
689 _ASSERT(annot_in_map != s_AnnotToSeqIdMap.end());
690
691 // Splice (and remove) this Seq-annot.
692 // Removal will advance the iterator. After the last Seq-annot for
693 // this Seq-id is removed, the Seq-id itself will be removed from
694 // the map (invalidating the local iterator), so the loop will be
695 // broken out of.
696 if (s_SpliceAnnot(sai, osc, annot_in_map)) {
697 break;
698 }
699 }
700 }
701