1 /*  $Id: seq_id_tree.cpp 624726 2021-02-03 18:51:54Z ivanov $
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *               National Center for Biotechnology Information
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government have not placed any restriction on its use or reproduction.
13 *
14 *  Although all reasonable efforts have been taken to ensure the accuracy
15 *  and reliability of the software and data, the NLM and the U.S.
16 *  Government do not and cannot warrant the performance or results that
17 *  may be obtained by using this software or data. The NLM and the U.S.
18 *  Government disclaim all warranties, express or implied, including
19 *  warranties of performance, merchantability or fitness for any particular
20 *  purpose.
21 *
22 *  Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Aleksey Grichenko, Eugene Vasilchenko
27 *
28 * File Description:
29 *   Seq-id mapper for Object Manager
30 *
31 */
32 
33 #include <ncbi_pch.hpp>
34 #include <objects/misc/error_codes.hpp>
35 #include <corelib/ncbi_param.hpp>
36 #include "seq_id_tree.hpp"
37 #include <objects/seq/seq_id_mapper.hpp>
38 #include <common/ncbi_sanitizers.h>
39 
40 
41 #define NCBI_USE_ERRCODE_X   Objects_SeqIdMap
42 
43 
44 BEGIN_NCBI_SCOPE
45 BEGIN_SCOPE(objects)
46 
47 //#define NCBI_SLOW_ATOMIC_SWAP
48 #ifdef NCBI_SLOW_ATOMIC_SWAP
49 DEFINE_STATIC_FAST_MUTEX(sx_GetSeqIdMutex);
50 #endif
51 
52 ////////////////////////////////////////////////////////////////////
53 //
54 //  CSeq_id_***_Tree::
55 //
56 //    Seq-id sub-type specific trees
57 //
58 
CSeq_id_Which_Tree(CSeq_id_Mapper * mapper)59 CSeq_id_Which_Tree::CSeq_id_Which_Tree(CSeq_id_Mapper* mapper)
60     : m_Mapper(mapper)
61 {
62     _ASSERT(mapper);
63 }
64 
65 
~CSeq_id_Which_Tree(void)66 CSeq_id_Which_Tree::~CSeq_id_Which_Tree(void)
67 {
68 }
69 
70 
HaveMatch(const CSeq_id_Handle &) const71 bool CSeq_id_Which_Tree::HaveMatch(const CSeq_id_Handle& ) const
72 {
73     return false; // Assume no matches by default
74 }
75 
76 
FindMatch(const CSeq_id_Handle & id,TSeq_id_MatchList & id_list) const77 void CSeq_id_Which_Tree::FindMatch(const CSeq_id_Handle& id,
78                                    TSeq_id_MatchList& id_list) const
79 {
80     id_list.insert(id); // only exact match by default
81 }
82 
83 
Match(const CSeq_id_Handle & h1,const CSeq_id_Handle & h2) const84 bool CSeq_id_Which_Tree::Match(const CSeq_id_Handle& h1,
85                                const CSeq_id_Handle& h2) const
86 {
87     if ( h1 == h2 ) {
88         return true;
89     }
90     if ( HaveMatch(h1) ) {
91         TSeq_id_MatchList id_list;
92         FindMatch(h1, id_list);
93         return id_list.find(h2) != id_list.end();
94     }
95     return false;
96 }
97 
98 
IsBetterVersion(const CSeq_id_Handle &,const CSeq_id_Handle &) const99 bool CSeq_id_Which_Tree::IsBetterVersion(const CSeq_id_Handle& /*h1*/,
100                                          const CSeq_id_Handle& /*h2*/) const
101 {
102     return false; // No id version by default
103 }
104 
105 
106 inline
CreateInfo(CSeq_id::E_Choice type)107 CSeq_id_Info* CSeq_id_Which_Tree::CreateInfo(CSeq_id::E_Choice type)
108 {
109     return new CSeq_id_Info(type, m_Mapper);
110 }
111 
112 
HaveReverseMatch(const CSeq_id_Handle &) const113 bool CSeq_id_Which_Tree::HaveReverseMatch(const CSeq_id_Handle& ) const
114 {
115     return false; // Assume no reverse matches by default
116 }
117 
118 
FindReverseMatch(const CSeq_id_Handle & id,TSeq_id_MatchList & id_list)119 void CSeq_id_Which_Tree::FindReverseMatch(const CSeq_id_Handle& id,
120                                           TSeq_id_MatchList& id_list)
121 {
122     id_list.insert(id);
123     return;
124 }
125 
126 
s_AssignObject_id(CObject_id & new_id,const CObject_id & old_id)127 static inline void s_AssignObject_id(CObject_id& new_id,
128                                      const CObject_id& old_id)
129 {
130     if ( old_id.IsStr() ) {
131         new_id.SetStr(old_id.GetStr());
132     }
133     else {
134         new_id.SetId(old_id.GetId());
135     }
136 }
137 
138 
s_AssignDbtag(CDbtag & new_id,const CDbtag & old_id)139 static inline void s_AssignDbtag(CDbtag& new_id,
140                                  const CDbtag& old_id)
141 {
142     new_id.SetDb(old_id.GetDb());
143     s_AssignObject_id(new_id.SetTag(), old_id.GetTag());
144 }
145 
146 
s_AssignTextseq_id(CTextseq_id & new_tid,const CTextseq_id & old_tid)147 static inline void s_AssignTextseq_id(CTextseq_id& new_tid,
148                                       const CTextseq_id& old_tid)
149 {
150     if (old_tid.IsSetAccession()) {
151         new_tid.SetAccession(old_tid.GetAccession());
152     }
153     if (old_tid.IsSetVersion()) {
154         new_tid.SetVersion(old_tid.GetVersion());
155     }
156     if (old_tid.IsSetName()) {
157         new_tid.SetName(old_tid.GetName());
158     }
159     if (old_tid.IsSetRelease()) {
160         new_tid.SetRelease(old_tid.GetRelease());
161     }
162 }
163 
164 
s_AssignSeq_id(CSeq_id & new_id,const CSeq_id & old_id)165 static inline void s_AssignSeq_id(CSeq_id& new_id,
166                                   const CSeq_id& old_id)
167 {
168     switch (old_id.Which()) {
169     case CSeq_id::e_Gi:
170         new_id.SetGi(old_id.GetGi());
171         break;
172 
173     case CSeq_id::e_Local:
174         s_AssignObject_id(new_id.SetLocal(), old_id.GetLocal());
175         break;
176 
177     case CSeq_id::e_General:
178         s_AssignDbtag(new_id.SetGeneral(), old_id.GetGeneral());
179         break;
180 
181     case CSeq_id::e_Other:
182         s_AssignTextseq_id(new_id.SetOther(), old_id.GetOther());
183         break;
184 
185     case CSeq_id::e_Genbank:
186         s_AssignTextseq_id(new_id.SetGenbank(), old_id.GetGenbank());
187         break;
188 
189     case CSeq_id::e_Embl:
190         s_AssignTextseq_id(new_id.SetEmbl(), old_id.GetEmbl());
191         break;
192 
193     case CSeq_id::e_Ddbj:
194         s_AssignTextseq_id(new_id.SetDdbj(), old_id.GetDdbj());
195         break;
196 
197     case CSeq_id::e_Gpipe:
198         s_AssignTextseq_id(new_id.SetGpipe(), old_id.GetGpipe());
199         break;
200 
201     case CSeq_id::e_Named_annot_track:
202         s_AssignTextseq_id(new_id.SetNamed_annot_track(), old_id.GetNamed_annot_track());
203         break;
204 
205     default:
206         new_id.Assign(old_id);
207         break;
208     }
209 }
210 
211 
CreateInfo(const CSeq_id & id)212 CSeq_id_Info* CSeq_id_Which_Tree::CreateInfo(const CSeq_id& id)
213 {
214     CRef<CSeq_id> id_ref(new CSeq_id);
215     s_AssignSeq_id(*id_ref, id);
216     return new CSeq_id_Info(id_ref, m_Mapper);
217 }
218 
219 
DropInfo(const CSeq_id_Info * info)220 void CSeq_id_Which_Tree::DropInfo(const CSeq_id_Info* info)
221 {
222     TWriteLockGuard guard(m_TreeLock);
223     if ( info->IsLocked() ) {
224         _ASSERT(info->m_Seq_id_Type != CSeq_id::e_not_set);
225         return;
226     }
227     if ( info->m_Seq_id_Type == CSeq_id::e_not_set ) {
228         _ASSERT(!info->IsLocked());
229         return;
230     }
231     x_Unindex(info);
232     _ASSERT(!info->IsLocked());
233     _ASSERT(info->m_Seq_id_Type != CSeq_id::e_not_set);
234     // ThreadSanitizer may report this as a race since m_Seq_id_Type
235     // may be accessed by other threads without locking the mutex.
236     // This race is safe to suppress since the object is never actually
237     // used after entering DropInfo().
238     const_cast<CSeq_id_Info*>(info)->m_Seq_id_Type = CSeq_id::e_not_set;
239 }
240 
241 
GetGiHandle(TGi)242 CSeq_id_Handle CSeq_id_Which_Tree::GetGiHandle(TGi /*gi*/)
243 {
244     NCBI_THROW(CSeq_id_MapperException, eTypeError, "Invalid seq-id type");
245 }
246 
247 
Initialize(CSeq_id_Mapper * mapper,vector<CRef<CSeq_id_Which_Tree>> & v)248 void CSeq_id_Which_Tree::Initialize(CSeq_id_Mapper* mapper,
249                                     vector<CRef<CSeq_id_Which_Tree> >& v)
250 {
251     NCBI_LSAN_DISABLE_GUARD;
252 
253     v.resize(CSeq_id::e_MaxChoice);
254     v[CSeq_id::e_not_set].Reset(new CSeq_id_not_set_Tree(mapper));
255     v[CSeq_id::e_Local].Reset(new CSeq_id_Local_Tree(mapper));
256     v[CSeq_id::e_Gibbsq].Reset(new CSeq_id_Gibbsq_Tree(mapper));
257     v[CSeq_id::e_Gibbmt].Reset(new CSeq_id_Gibbmt_Tree(mapper));
258     v[CSeq_id::e_Giim].Reset(new CSeq_id_Giim_Tree(mapper));
259     // These three types share the same accessions space
260     CRef<CSeq_id_Which_Tree> gb(new CSeq_id_GB_Tree(mapper));
261     v[CSeq_id::e_Genbank] = gb;
262     v[CSeq_id::e_Embl] = gb;
263     v[CSeq_id::e_Ddbj] = gb;
264     v[CSeq_id::e_Pir].Reset(new CSeq_id_Pir_Tree(mapper));
265     v[CSeq_id::e_Swissprot].Reset(new CSeq_id_Swissprot_Tree(mapper));
266     v[CSeq_id::e_Patent].Reset(new CSeq_id_Patent_Tree(mapper));
267     v[CSeq_id::e_Other].Reset(new CSeq_id_Other_Tree(mapper));
268     v[CSeq_id::e_General].Reset(new CSeq_id_General_Tree(mapper));
269     v[CSeq_id::e_Gi].Reset(new CSeq_id_Gi_Tree(mapper));
270     // see above    v[CSeq_id::e_Ddbj] = gb;
271     v[CSeq_id::e_Prf].Reset(new CSeq_id_Prf_Tree(mapper));
272     v[CSeq_id::e_Pdb].Reset(new CSeq_id_PDB_Tree(mapper));
273     v[CSeq_id::e_Tpg].Reset(new CSeq_id_Tpg_Tree(mapper));
274     v[CSeq_id::e_Tpe].Reset(new CSeq_id_Tpe_Tree(mapper));
275     v[CSeq_id::e_Tpd].Reset(new CSeq_id_Tpd_Tree(mapper));
276     v[CSeq_id::e_Gpipe].Reset(new CSeq_id_Gpipe_Tree(mapper));
277     v[CSeq_id::e_Named_annot_track].Reset(new CSeq_id_Named_annot_track_Tree(mapper));
278 }
279 
280 
281 static const size_t kMallocOverhead = 2*sizeof(void*);
282 
sx_StringMemory(const string & s)283 static size_t sx_StringMemory(const string& s)
284 {
285     size_t size = s.capacity();
286     if ( size ) {
287         if ( size + sizeof(void*) > sizeof(string) ) {
288             // ref-counted
289             size += sizeof(void*) + kMallocOverhead;
290         }
291     }
292     return size;
293 }
294 
295 
296 /////////////////////////////////////////////////////////////////////////////
297 // CSeq_id_not_set_Tree
298 /////////////////////////////////////////////////////////////////////////////
299 
CSeq_id_not_set_Tree(CSeq_id_Mapper * mapper)300 CSeq_id_not_set_Tree::CSeq_id_not_set_Tree(CSeq_id_Mapper* mapper)
301     : CSeq_id_Which_Tree(mapper)
302 {
303 }
304 
305 
~CSeq_id_not_set_Tree(void)306 CSeq_id_not_set_Tree::~CSeq_id_not_set_Tree(void)
307 {
308 }
309 
310 
Empty(void) const311 bool CSeq_id_not_set_Tree::Empty(void) const
312 {
313     return true;
314 }
315 
316 
317 inline
x_Check(const CSeq_id & id) const318 bool CSeq_id_not_set_Tree::x_Check(const CSeq_id& id) const
319 {
320     return id.Which() == CSeq_id::e_not_set;
321 }
322 
323 
DropInfo(const CSeq_id_Info *)324 void CSeq_id_not_set_Tree::DropInfo(const CSeq_id_Info* /*info*/)
325 {
326 }
327 
328 
x_Unindex(const CSeq_id_Info *)329 void CSeq_id_not_set_Tree::x_Unindex(const CSeq_id_Info* /*info*/)
330 {
331 }
332 
333 
FindInfo(const CSeq_id &) const334 CSeq_id_Handle CSeq_id_not_set_Tree::FindInfo(const CSeq_id& /*id*/) const
335 {
336     return null;
337 }
338 
339 
FindOrCreate(const CSeq_id &)340 CSeq_id_Handle CSeq_id_not_set_Tree::FindOrCreate(const CSeq_id& /*id*/)
341 {
342     return null;
343 }
344 
345 
FindMatch(const CSeq_id_Handle &,TSeq_id_MatchList &) const346 void CSeq_id_not_set_Tree::FindMatch(const CSeq_id_Handle& /*id*/,
347                                      TSeq_id_MatchList& /*id_list*/) const
348 {
349     ERR_POST_X(3, Warning << "CSeq_id_Mapper::GetMatchingHandles() -- "
350                "uninitialized seq-id");
351 }
352 
353 
FindMatchStr(const string &,TSeq_id_MatchList &) const354 void CSeq_id_not_set_Tree::FindMatchStr(const string& /*sid*/,
355                                         TSeq_id_MatchList& /*id_list*/) const
356 {
357 }
358 
359 
FindReverseMatch(const CSeq_id_Handle &,TSeq_id_MatchList &)360 void CSeq_id_not_set_Tree::FindReverseMatch(const CSeq_id_Handle& /*id*/,
361                                             TSeq_id_MatchList& /*id_list*/)
362 {
363     ERR_POST_X(4, Warning << "CSeq_id_Mapper::GetReverseMatchingHandles() -- "
364                "uninitialized seq-id");
365 }
366 
367 
Dump(CNcbiOstream & out,CSeq_id::E_Choice type,int details) const368 size_t CSeq_id_not_set_Tree::Dump(CNcbiOstream& out,
369                                   CSeq_id::E_Choice type,
370                                   int details) const
371 {
372     if ( details >= CSeq_id_Mapper::eDumpStatistics ) {
373         out << "CSeq_id_Handles("<<CSeq_id::SelectionName(type)<<"): ";
374         out << "virtual, no memory" << endl;
375     }
376     return 0;
377 }
378 
379 
380 /////////////////////////////////////////////////////////////////////////////
381 // CSeq_id_int_Tree
382 /////////////////////////////////////////////////////////////////////////////
383 
384 
CSeq_id_int_Tree(CSeq_id_Mapper * mapper)385 CSeq_id_int_Tree::CSeq_id_int_Tree(CSeq_id_Mapper* mapper)
386     : CSeq_id_Which_Tree(mapper)
387 {
388 }
389 
390 
~CSeq_id_int_Tree(void)391 CSeq_id_int_Tree::~CSeq_id_int_Tree(void)
392 {
393 }
394 
395 
Empty(void) const396 bool CSeq_id_int_Tree::Empty(void) const
397 {
398     return m_IntMap.empty();
399 }
400 
401 
FindInfo(const CSeq_id & id) const402 CSeq_id_Handle CSeq_id_int_Tree::FindInfo(const CSeq_id& id) const
403 {
404     _ASSERT(x_Check(id));
405     TPacked value = x_Get(id);
406 
407     TReadLockGuard guard(m_TreeLock);
408     TIntMap::const_iterator it = m_IntMap.find(value);
409     if (it != m_IntMap.end()) {
410         return CSeq_id_Handle(it->second);
411     }
412     return null;
413 }
414 
415 
FindOrCreate(const CSeq_id & id)416 CSeq_id_Handle CSeq_id_int_Tree::FindOrCreate(const CSeq_id& id)
417 {
418     _ASSERT(x_Check(id));
419     TPacked value = x_Get(id);
420 
421     TWriteLockGuard guard(m_TreeLock);
422     pair<TIntMap::iterator, bool> ins =
423         m_IntMap.insert(TIntMap::value_type(value, nullptr));
424     if ( ins.second ) {
425         ins.first->second = CreateInfo(id);
426     }
427     return CSeq_id_Handle(ins.first->second);
428 }
429 
430 
x_Unindex(const CSeq_id_Info * info)431 void CSeq_id_int_Tree::x_Unindex(const CSeq_id_Info* info)
432 {
433     _ASSERT(x_Check(*info->GetSeqId()));
434     TPacked value = x_Get(*info->GetSeqId());
435 
436     _VERIFY(m_IntMap.erase(value));
437 }
438 
439 
FindMatchStr(const string & sid,TSeq_id_MatchList & id_list) const440 void CSeq_id_int_Tree::FindMatchStr(const string& sid,
441                                     TSeq_id_MatchList& id_list) const
442 {
443     TPacked value;
444     try {
445         value = NStr::StringToNumeric<TPacked>(sid);
446     }
447     catch (const CStringException& /*ignored*/) {
448         // Not an integer value
449         return;
450     }
451     TReadLockGuard guard(m_TreeLock);
452     TIntMap::const_iterator it = m_IntMap.find(value);
453     if (it != m_IntMap.end()) {
454         id_list.insert(CSeq_id_Handle(it->second));
455     }
456 }
457 
458 
Dump(CNcbiOstream & out,CSeq_id::E_Choice type,int details) const459 size_t CSeq_id_int_Tree::Dump(CNcbiOstream& out,
460                               CSeq_id::E_Choice type,
461                               int details) const
462 {
463     size_t total_bytes = 0;
464     if ( details >= CSeq_id_Mapper::eDumpStatistics ) {
465         out << "CSeq_id_Handles("<<CSeq_id::SelectionName(type)<<"): ";
466     }
467     size_t count = m_IntMap.size(), elem_size = 0, extra_size = 0;
468     if ( count ) {
469         elem_size = sizeof(int)+sizeof(void*); // map value
470         elem_size += sizeof(int)+3*sizeof(void*); // red/black tree overhead
471         elem_size += sizeof(CSeq_id_Info); //
472         elem_size += sizeof(CSeq_id); //
473         // malloc overhead:
474         // map value, CSeq_id_Info, CSeq_id
475         elem_size += 3*kMallocOverhead;
476     }
477     size_t bytes = count*elem_size+extra_size;
478     total_bytes += bytes;
479     if ( details >= CSeq_id_Mapper::eDumpStatistics ) {
480         out << count << " handles, "<<bytes<<" bytes" << endl;
481     }
482     if ( details >= CSeq_id_Mapper::eDumpAllIds ) {
483         ITERATE ( TIntMap, it, m_IntMap ) {
484             out << "  " << it->second->GetSeqId()->AsFastaString() << endl;
485         }
486     }
487     return total_bytes;
488 }
489 
490 /////////////////////////////////////////////////////////////////////////////
491 // CSeq_id_Gibbsq_Tree
492 /////////////////////////////////////////////////////////////////////////////
493 
CSeq_id_Gibbsq_Tree(CSeq_id_Mapper * mapper)494 CSeq_id_Gibbsq_Tree::CSeq_id_Gibbsq_Tree(CSeq_id_Mapper* mapper)
495     : CSeq_id_int_Tree(mapper)
496 {
497 }
498 
499 
x_Check(const CSeq_id & id) const500 bool CSeq_id_Gibbsq_Tree::x_Check(const CSeq_id& id) const
501 {
502     return id.IsGibbsq();
503 }
504 
505 
x_Get(const CSeq_id & id) const506 CSeq_id_Gibbsq_Tree::TPacked CSeq_id_Gibbsq_Tree::x_Get(const CSeq_id& id) const
507 {
508     return INT_ID_FROM(CSeq_id::TGibbsq, id.GetGibbsq());
509 }
510 
511 
512 /////////////////////////////////////////////////////////////////////////////
513 // CSeq_id_Gibbmt_Tree
514 /////////////////////////////////////////////////////////////////////////////
515 
CSeq_id_Gibbmt_Tree(CSeq_id_Mapper * mapper)516 CSeq_id_Gibbmt_Tree::CSeq_id_Gibbmt_Tree(CSeq_id_Mapper* mapper)
517     : CSeq_id_int_Tree(mapper)
518 {
519 }
520 
521 
x_Check(const CSeq_id & id) const522 bool CSeq_id_Gibbmt_Tree::x_Check(const CSeq_id& id) const
523 {
524     return id.IsGibbmt();
525 }
526 
527 
x_Get(const CSeq_id & id) const528 CSeq_id_Gibbmt_Tree::TPacked CSeq_id_Gibbmt_Tree::x_Get(const CSeq_id& id) const
529 {
530     return INT_ID_FROM(CSeq_id::TGibbmt, id.GetGibbmt());
531 }
532 
533 
534 /////////////////////////////////////////////////////////////////////////////
535 // CSeq_id_Gi_Tree
536 /////////////////////////////////////////////////////////////////////////////
537 
538 
CSeq_id_Gi_Info(CSeq_id_Mapper * mapper)539 CSeq_id_Gi_Info::CSeq_id_Gi_Info(CSeq_id_Mapper* mapper)
540     : CSeq_id_Info(CSeq_id::e_Gi, mapper)
541 {
542 }
543 
544 
GetPackedSeqId(TPacked gi,TVariant) const545 CConstRef<CSeq_id> CSeq_id_Gi_Info::GetPackedSeqId(TPacked gi, TVariant /*variant*/) const
546 {
547     CConstRef<CSeq_id> ret;
548     typedef CSeq_id_Gi_Info TThis;
549 #if defined NCBI_SLOW_ATOMIC_SWAP
550     CFastMutexGuard guard(sx_GetSeqIdMutex);
551     ret = m_Seq_id;
552     const_cast<TThis*>(this)->m_Seq_id.Reset();
553     if ( !ret || !ret->ReferencedOnlyOnce() ) {
554         ret.Reset(new CSeq_id);
555     }
556     const_cast<TThis*>(this)->m_Seq_id = ret;
557 #else
558     const_cast<TThis*>(this)->m_Seq_id.AtomicReleaseTo(ret);
559     if ( !ret || !ret->ReferencedOnlyOnce() ) {
560         ret.Reset(new CSeq_id);
561     }
562     const_cast<TThis*>(this)->m_Seq_id.AtomicResetFrom(ret);
563 #endif
564     const_cast<CSeq_id&>(*ret).SetGi(GI_FROM(TPacked, gi));
565     return ret;
566 }
567 
568 
CSeq_id_Gi_Tree(CSeq_id_Mapper * mapper)569 CSeq_id_Gi_Tree::CSeq_id_Gi_Tree(CSeq_id_Mapper* mapper)
570     : CSeq_id_Which_Tree(mapper),
571       m_ZeroInfo(0),
572       m_SharedInfo(0)
573 {
574 }
575 
576 
~CSeq_id_Gi_Tree(void)577 CSeq_id_Gi_Tree::~CSeq_id_Gi_Tree(void)
578 {
579 }
580 
581 
Empty(void) const582 bool CSeq_id_Gi_Tree::Empty(void) const
583 {
584     return true;
585 }
586 
587 
588 inline
x_Check(const CSeq_id & id) const589 bool CSeq_id_Gi_Tree::x_Check(const CSeq_id& id) const
590 {
591     return id.IsGi();
592 }
593 
594 
595 inline
x_Get(const CSeq_id & id) const596 TGi CSeq_id_Gi_Tree::x_Get(const CSeq_id& id) const
597 {
598     return id.GetGi();
599 }
600 
601 
x_Unindex(const CSeq_id_Info * info)602 void CSeq_id_Gi_Tree::x_Unindex(const CSeq_id_Info* info)
603 {
604     if ( info == m_SharedInfo ) {
605         m_SharedInfo = 0;
606     }
607     else if ( info == m_ZeroInfo ) {
608         m_ZeroInfo = 0;
609     }
610 }
611 
612 
GetGiHandle(TGi gi)613 CSeq_id_Handle CSeq_id_Gi_Tree::GetGiHandle(TGi gi)
614 {
615     if ( gi != ZERO_GI ) {
616         TWriteLockGuard guard(m_TreeLock);
617         if ( !m_SharedInfo ) {
618             m_SharedInfo = new CSeq_id_Gi_Info(m_Mapper);
619         }
620         return CSeq_id_Handle(m_SharedInfo, GI_TO(TPacked, gi));
621     }
622     else {
623         TWriteLockGuard guard(m_TreeLock);
624         if ( !m_ZeroInfo ) {
625             CRef<CSeq_id> zero_id(new CSeq_id);
626             zero_id->SetGi(ZERO_GI);
627             m_ZeroInfo = CreateInfo(*zero_id);
628         }
629         return CSeq_id_Handle(m_ZeroInfo);
630     }
631 }
632 
633 
FindInfo(const CSeq_id & id) const634 CSeq_id_Handle CSeq_id_Gi_Tree::FindInfo(const CSeq_id& id) const
635 {
636     CSeq_id_Handle ret;
637     _ASSERT(x_Check(id));
638     TPacked gi = GI_TO(TPacked, x_Get(id));
639     TReadLockGuard guard(m_TreeLock);
640     if ( gi ) {
641         if ( m_SharedInfo ) {
642             ret = CSeq_id_Handle(m_SharedInfo, gi);
643         }
644     }
645     else if ( m_ZeroInfo ) {
646         ret = CSeq_id_Handle(m_ZeroInfo);
647     }
648     return ret;
649 }
650 
651 
FindOrCreate(const CSeq_id & id)652 CSeq_id_Handle CSeq_id_Gi_Tree::FindOrCreate(const CSeq_id& id)
653 {
654     _ASSERT(x_Check(id));
655     return GetGiHandle(x_Get(id));
656 }
657 
658 
FindMatchStr(const string & sid,TSeq_id_MatchList & id_list) const659 void CSeq_id_Gi_Tree::FindMatchStr(const string& sid,
660                                    TSeq_id_MatchList& id_list) const
661 {
662     TPacked gi;
663     try {
664         gi = NStr::StringToNumeric<TPacked>(sid);
665     }
666     catch (const CStringException& /*ignored*/) {
667         // Not an integer value
668         return;
669     }
670     if (gi) {
671         id_list.insert(CSeq_id_Handle(m_SharedInfo, gi));
672     }
673     else if ( m_ZeroInfo ) {
674         id_list.insert(CSeq_id_Handle(m_ZeroInfo));
675     }
676 }
677 
678 
Dump(CNcbiOstream & out,CSeq_id::E_Choice type,int details) const679 size_t CSeq_id_Gi_Tree::Dump(CNcbiOstream& out,
680                              CSeq_id::E_Choice type,
681                              int details) const
682 {
683     if ( details >= CSeq_id_Mapper::eDumpStatistics ) {
684         out << "CSeq_id_Handles("<<CSeq_id::SelectionName(type)<<"): ";
685         out << "virtual, small constant memory";
686         out << endl;
687     }
688     return 0;
689 }
690 
691 /////////////////////////////////////////////////////////////////////////////
692 // CSeq_id_Textseq_Tree
693 /////////////////////////////////////////////////////////////////////////////
694 
695 
696 NCBI_PARAM_DECL(bool, OBJECTS, PACK_TEXTID);
697 NCBI_PARAM_DEF_EX(bool, OBJECTS, PACK_TEXTID, true,
698                   eParam_NoThread, OBJECTS_PACK_TEXTID);
s_PackTextidEnabled(void)699 static inline bool s_PackTextidEnabled(void)
700 {
701     static CSafeStatic<NCBI_PARAM_TYPE(OBJECTS, PACK_TEXTID)> value;
702     return value->Get();
703 }
704 
705 NCBI_PARAM_DECL(bool, OBJECTS, PACK_GENERAL);
706 NCBI_PARAM_DEF_EX(bool, OBJECTS, PACK_GENERAL, true,
707                   eParam_NoThread, OBJECTS_PACK_GENERAL);
s_PackGeneralEnabled(void)708 static inline bool s_PackGeneralEnabled(void)
709 {
710     static CSafeStatic<NCBI_PARAM_TYPE(OBJECTS, PACK_GENERAL)> value;
711     return value->Get();
712 }
713 
714 static inline
s_RestoreNumber(string & str,size_t pos,size_t len,TIntId number)715 void s_RestoreNumber(string& str, size_t pos, size_t len, TIntId number)
716 {
717     char* start = &str[pos];
718     char* ptr = start + len;
719     while ( number ) {
720         *--ptr = (char)('0' + number % 10);
721         number /= 10;
722     }
723     while ( ptr > start ) {
724         *--ptr = '0';
725     }
726 }
727 
728 static inline
s_ParseNumber(const string & str,size_t pos,size_t len)729 TIntId s_ParseNumber(const string& str, size_t pos, size_t len)
730 {
731     TIntId number = 0;
732     for ( size_t i = pos; i < pos+len; ++i ) {
733         number = number * 10 + (str[i]-'0');
734     }
735     return number;
736 }
737 
738 
739 static inline
s_RestoreCaseVariant(string & str,size_t len,CSeq_id_Handle::TVariant variant)740 CSeq_id_Handle::TVariant s_RestoreCaseVariant(string& str, size_t len,
741                                               CSeq_id_Handle::TVariant variant)
742 {
743     for ( size_t i = 0; variant && i != len; ++i ) {
744         int c = Uint1(str[i]);
745         if ( isalpha(c) ) {
746             if ( variant & 1 ) {
747                 // flip case
748                 if ( islower(c) ) {
749                     c = toupper(c);
750                 }
751                 else {
752                     c = tolower(c);
753                 }
754                 str[i] = c;
755             }
756             variant >>= 1;
757         }
758     }
759     return variant;
760 }
761 
762 
763 static inline
s_RestoreCaseVariant(string & str,CSeq_id_Handle::TVariant variant)764 CSeq_id_Handle::TVariant s_RestoreCaseVariant(string& str, CSeq_id_Handle::TVariant variant)
765 {
766     return s_RestoreCaseVariant(str, str.size(), variant);
767 }
768 
769 
770 static inline
771 pair<CSeq_id_Handle::TVariant, CSeq_id_Handle::TVariant>
s_ParseCaseVariant(CTempString ref,const char * str,CSeq_id_Handle::TVariant bit)772 s_ParseCaseVariant(CTempString ref, const char* str,
773                    CSeq_id_Handle::TVariant bit)
774 {
775     CSeq_id_Handle::TVariant variant = 0;
776     for ( size_t i = 0; bit && i != ref.size(); ++i ) {
777         int cr = Uint1(ref[i]);
778         if ( !isalpha(cr) ) {
779             continue;
780         }
781         int cs = Uint1(str[i]);
782         if ( cs != cr ) {
783             _ASSERT((isupper(cs) && tolower(cs) == cr) ||
784                     (islower(cs) && toupper(cs) == cr));
785             variant |= bit;
786         }
787         bit <<= 1;
788     }
789     return make_pair(variant, bit);
790 }
791 
792 
793 static inline
794 pair<CSeq_id_Handle::TVariant, CSeq_id_Handle::TVariant>
s_ParseCaseVariant(CTempString ref,const string & str,CSeq_id_Handle::TVariant bit=1)795 s_ParseCaseVariant(CTempString ref, const string& str,
796                    CSeq_id_Handle::TVariant bit = 1)
797 {
798     _ASSERT(ref.size() <= str.size());
799     return s_ParseCaseVariant(ref, str.data(), bit);
800 }
801 
802 
803 static inline
s_RestoreNumberAndCaseVariant(string & str,size_t pos,size_t len,TIntId number,CSeq_id_Handle::TVariant variant)804 void s_RestoreNumberAndCaseVariant(string& str, size_t pos, size_t len, TIntId number,
805                                    CSeq_id_Handle::TVariant variant)
806 {
807     s_RestoreNumber(str, pos, len, number);
808     if ( variant ) {
809         s_RestoreCaseVariant(str, pos, variant);
810     }
811 }
812 
813 
CSeq_id_Textseq_Info(CSeq_id::E_Choice type,CSeq_id_Mapper * mapper,const TKey & key)814 CSeq_id_Textseq_Info::CSeq_id_Textseq_Info(CSeq_id::E_Choice type,
815                                            CSeq_id_Mapper* mapper,
816                                            const TKey& key)
817     : CSeq_id_Info(type, mapper),
818       m_Key(key)
819 {
820 }
821 
822 
~CSeq_id_Textseq_Info(void)823 CSeq_id_Textseq_Info::~CSeq_id_Textseq_Info(void)
824 {
825 }
826 
827 
828 CSeq_id_Textseq_Info::TKey
ParseAcc(const string & acc,const TVersion * ver)829 CSeq_id_Textseq_Info::ParseAcc(const string& acc,
830                                const TVersion* ver)
831 {
832     TKey key;
833     size_t len = acc.size(), prefix_len = len, most_significant = NPOS;
834     while ( prefix_len ) {
835         char c = acc[--prefix_len];
836         if ( c >= '1' && c <= '9' ) {
837             most_significant = prefix_len;
838         }
839         else if ( c != '0' ) {
840             ++prefix_len;
841             break;
842         }
843     }
844     if ( most_significant == NPOS ) {
845         return key;
846     }
847     size_t acc_digits = len - prefix_len, real_digits = len - most_significant;
848     if ( acc_digits < 2 || acc_digits > 12 ||
849          real_digits > 9 || acc_digits*2 < prefix_len ) {
850         return key;
851     }
852     if ( prefix_len <= 4 ) {
853         // good
854     }
855     else if ( prefix_len == 3 ) {
856         if ( (acc[0] != 'N' && acc[0] != 'Y') ||
857              (acc[1] != 'P' && acc[1] != 'C') ||
858              (acc[2] != '_') ) {
859             return key;
860         }
861     }
862     else {
863         return key;
864     }
865     if ( acc_digits > 6 && real_digits < acc_digits ) {
866         acc_digits = max(size_t(6), real_digits);
867         prefix_len = len - acc_digits;
868     }
869     if ( prefix_len > key.kMaxPrefixLen ) {
870         return key;
871     }
872     key.m_PrefixLen = prefix_len;
873     memcpy(key.m_PrefixBuf, acc.data(), prefix_len);
874     unsigned hash = 0;
875     for ( size_t i = 0; i < 3 && i < prefix_len; ++i ) {
876         hash = (hash << 8) | toupper(key.m_PrefixBuf[i] & 0xff);
877     }
878     hash = (hash << 8) | unsigned(acc_digits << 1);
879     key.m_Hash = hash;
880     if ( ver ) {
881         key.SetVersion(*ver);
882     }
883     return key;
884 }
885 
886 
RestoreAccession(string & acc,TPacked param,TVariant variant) const887 void CSeq_id_Textseq_Info::RestoreAccession(string& acc, TPacked param, TVariant variant) const
888 {
889     acc = GetAccPrefix();
890     acc.resize(acc.size() + GetAccDigits(), '0');
891     s_RestoreNumberAndCaseVariant(acc, GetAccPrefix().size(), GetAccDigits(), param, variant);
892 }
893 
894 
Restore(CTextseq_id & id,TPacked param,TVariant variant) const895 void CSeq_id_Textseq_Info::Restore(CTextseq_id& id, TPacked param, TVariant variant) const
896 {
897     if ( !id.IsSetAccession() ) {
898         id.SetAccession(GetAccPrefix());
899         string& acc = id.SetAccession();
900         acc.resize(acc.size() + GetAccDigits(), '0');
901         if ( IsSetVersion() ) {
902             id.SetVersion(GetVersion());
903         }
904     }
905     s_RestoreNumberAndCaseVariant(id.SetAccession(),
906                                   GetAccPrefix().size(), GetAccDigits(), param, variant);
907 }
908 
909 
910 inline
911 CSeq_id_Textseq_Info::TPacked
Pack(const TKey & key,const string & acc)912 CSeq_id_Textseq_Info::Pack(const TKey& key, const string& acc)
913 {
914     return s_ParseNumber(acc, key.GetPrefixLen(), key.GetAccDigits());
915 }
916 
917 
918 inline
919 CSeq_id_Textseq_Info::TPacked
Pack(const TKey & key,const CTextseq_id & tid)920 CSeq_id_Textseq_Info::Pack(const TKey& key, const CTextseq_id& tid)
921 {
922     return Pack(key, tid.GetAccession());
923 }
924 
925 
926 inline
927 CSeq_id_Info::TVariant
ParseCaseVariant(const CSeq_id_Info * info,const string & acc)928 CSeq_id_Textseq_Info::ParseCaseVariant(const CSeq_id_Info* info, const string& acc)
929 {
930     return s_ParseCaseVariant(info->GetSeqId()->GetTextseq_Id()->GetAccession(), acc).first;
931 }
932 
933 
934 inline
935 CSeq_id_Info::TVariant
ParseCaseVariant(const string & acc) const936 CSeq_id_Textseq_Info::TKey::ParseCaseVariant(const string& acc) const
937 {
938     return s_ParseCaseVariant(GetAccPrefix(), acc).first;
939 }
940 
941 
GetPackedSeqId(TPacked param,TVariant variant) const942 CConstRef<CSeq_id> CSeq_id_Textseq_Info::GetPackedSeqId(TPacked param, TVariant variant) const
943 {
944     CConstRef<CSeq_id> ret;
945     typedef CSeq_id_Textseq_Info TThis;
946     if ( variant ) {
947         // all non-initial case variants need fresh Seq-id to start with
948         ret = new CSeq_id;
949     }
950     else {
951         // otherwise try to use shared Seq-id if it's not referenced anywhere else
952 #if defined NCBI_SLOW_ATOMIC_SWAP
953         CFastMutexGuard guard(sx_GetSeqIdMutex);
954         ret = m_Seq_id;
955         const_cast<TThis*>(this)->m_Seq_id.Reset();
956         if ( !ret || !ret->ReferencedOnlyOnce() ) {
957             ret.Reset(new CSeq_id);
958         }
959         const_cast<TThis*>(this)->m_Seq_id = ret;
960 #else
961         const_cast<TThis*>(this)->m_Seq_id.AtomicReleaseTo(ret);
962         if ( !ret || !ret->ReferencedOnlyOnce() ) {
963             ret.Reset(new CSeq_id);
964         }
965         const_cast<TThis*>(this)->m_Seq_id.AtomicResetFrom(ret);
966 #endif
967     }
968     // split accession number and version
969     const_cast<CSeq_id&>(*ret).Select(GetType(), eDoNotResetVariant);
970     Restore(*const_cast<CTextseq_id*>(ret->GetTextseq_Id()), param, variant);
971     return ret;
972 }
973 
974 
CSeq_id_Textseq_PlainInfo(const CConstRef<CSeq_id> & seq_id,CSeq_id_Mapper * mapper)975 CSeq_id_Textseq_PlainInfo::CSeq_id_Textseq_PlainInfo(const CConstRef<CSeq_id>& seq_id,
976                                                      CSeq_id_Mapper* mapper)
977     : CSeq_id_Info(seq_id, mapper)
978 {
979 }
980 
981 
982 inline
983 CSeq_id_Info::TVariant
ParseCaseVariant(const string & acc) const984 CSeq_id_Textseq_PlainInfo::ParseCaseVariant(const string& acc) const
985 {
986     return s_ParseCaseVariant(m_Seq_id->GetTextseq_Id()->GetAccession(), acc).first;
987 }
988 
989 
990 inline
991 CSeq_id_Info::TVariant
ParseCaseVariant(const CTextseq_id & id) const992 CSeq_id_Textseq_PlainInfo::ParseCaseVariant(const CTextseq_id& id) const
993 {
994     if ( !id.IsSetAccession() ) {
995         return 0;
996     }
997     return s_ParseCaseVariant(m_Seq_id->GetTextseq_Id()->GetAccession(), id.GetAccession()).first;
998 }
999 
1000 
GetPackedSeqId(TPacked packed,TVariant variant) const1001 CConstRef<CSeq_id> CSeq_id_Textseq_PlainInfo::GetPackedSeqId(TPacked packed, TVariant variant) const
1002 {
1003     _ASSERT(!packed);
1004     _ASSERT(variant);
1005     CRef<CSeq_id> ret(new CSeq_id);
1006     s_AssignSeq_id(*ret, *m_Seq_id);
1007     s_RestoreCaseVariant(const_cast<CTextseq_id*>(ret->GetTextseq_Id())->SetAccession(), variant);
1008     return ret;
1009 }
1010 
1011 
CSeq_id_Textseq_Tree(CSeq_id_Mapper * mapper,CSeq_id::E_Choice type)1012 CSeq_id_Textseq_Tree::CSeq_id_Textseq_Tree(CSeq_id_Mapper* mapper,
1013                                            CSeq_id::E_Choice type)
1014     : CSeq_id_Which_Tree(mapper),
1015       m_Type(type)
1016 {
1017 }
1018 
1019 
~CSeq_id_Textseq_Tree(void)1020 CSeq_id_Textseq_Tree::~CSeq_id_Textseq_Tree(void)
1021 {
1022 }
1023 
1024 
x_Check(const CSeq_id::E_Choice & type) const1025 bool CSeq_id_Textseq_Tree::x_Check(const CSeq_id::E_Choice& type) const
1026 {
1027     return type == m_Type;
1028 }
1029 
1030 
x_Check(const CSeq_id & id) const1031 bool CSeq_id_Textseq_Tree::x_Check(const CSeq_id& id) const
1032 {
1033     return x_Check(id.Which());
1034 }
1035 
1036 
Empty(void) const1037 bool CSeq_id_Textseq_Tree::Empty(void) const
1038 {
1039     return m_ByName.empty() && m_ByAcc.empty() && m_PackedMap.empty();
1040 }
1041 
1042 
x_Equals(const CTextseq_id & id1,const CTextseq_id & id2)1043 bool CSeq_id_Textseq_Tree::x_Equals(const CTextseq_id& id1,
1044                                     const CTextseq_id& id2)
1045 {
1046     if ( id1.IsSetAccession() != id2.IsSetAccession() ) {
1047         return false;
1048     }
1049     if ( id1.IsSetName() != id2.IsSetName() ) {
1050         return false;
1051     }
1052     if ( id1.IsSetVersion() != id2.IsSetVersion() ) {
1053         return false;
1054     }
1055     if ( id1.IsSetRelease() != id2.IsSetRelease() ) {
1056         return false;
1057     }
1058     if ( id1.IsSetAccession() &&
1059          !NStr::EqualNocase(id1.GetAccession(), id2.GetAccession()) ) {
1060         return false;
1061     }
1062     if ( id1.IsSetName() &&
1063          !NStr::EqualNocase(id1.GetName(), id2.GetName()) ) {
1064         return false;
1065     }
1066     if ( id1.IsSetVersion() &&
1067          id1.GetVersion() != id2.GetVersion() ) {
1068         return false;
1069     }
1070     if ( id1.IsSetRelease() &&
1071          id1.GetRelease() != id2.GetRelease() ) {
1072         return false;
1073     }
1074     return true;
1075 }
1076 
1077 
1078 CSeq_id_Textseq_PlainInfo*
x_FindStrInfo(const TStringMap & str_map,const string & str,CSeq_id::E_Choice type,const CTextseq_id & tid) const1079 CSeq_id_Textseq_Tree::x_FindStrInfo(const TStringMap& str_map,
1080                                     const string& str,
1081                                     CSeq_id::E_Choice type,
1082                                     const CTextseq_id& tid) const
1083 {
1084     for ( TStringMapCI vit = str_map.find(str);
1085           vit != str_map.end() && NStr::EqualNocase(vit->first, str);
1086           ++vit ) {
1087         CConstRef<CSeq_id> id = vit->second->GetSeqId();
1088         if ( id->Which() == type && x_Equals(tid, x_Get(*id)) ) {
1089             return vit->second;
1090         }
1091     }
1092     return 0;
1093 }
1094 
1095 
1096 inline
1097 CSeq_id_Textseq_PlainInfo*
x_FindStrInfo(CSeq_id::E_Choice type,const CTextseq_id & tid) const1098 CSeq_id_Textseq_Tree::x_FindStrInfo(CSeq_id::E_Choice type,
1099                                     const CTextseq_id& tid) const
1100 {
1101     if ( tid.IsSetAccession() ) {
1102         return x_FindStrInfo(m_ByAcc, tid.GetAccession(), type, tid);
1103     }
1104     else if ( tid.IsSetName() ) {
1105         return x_FindStrInfo(m_ByName, tid.GetName(), type, tid);
1106     }
1107     else {
1108         return 0;
1109     }
1110 }
1111 
1112 
FindInfo(const CSeq_id & id) const1113 CSeq_id_Handle CSeq_id_Textseq_Tree::FindInfo(const CSeq_id& id) const
1114 {
1115     // Note: if a record is found by accession, no name is checked
1116     // even if it is also set.
1117     _ASSERT(x_Check(id));
1118     const CTextseq_id& tid = x_Get(id);
1119     // Can not compare if no accession given
1120     if ( s_PackTextidEnabled() &&
1121          tid.IsSetAccession() && !tid.IsSetName() && !tid.IsSetRelease() ) {
1122         const string& acc = tid.GetAccession();
1123         TPackedKey key = CSeq_id_Textseq_Info::ParseAcc(acc, tid);
1124         if ( key ) {
1125             TPacked packed = CSeq_id_Textseq_Info::Pack(key, tid);
1126             TReadLockGuard guard(m_TreeLock);
1127             TPackedMap_CI it = m_PackedMap.find(key);
1128             if ( it == m_PackedMap.end() ) {
1129                 return null;
1130             }
1131             return CSeq_id_Handle(it->second, packed, it->first.ParseCaseVariant(acc));
1132         }
1133     }
1134     TReadLockGuard guard(m_TreeLock);
1135     CSeq_id_Textseq_PlainInfo* info = x_FindStrInfo(id.Which(), tid);
1136     CSeq_id_Handle::TVariant variant = info? info->ParseCaseVariant(tid): 0;
1137     return CSeq_id_Handle(info, 0, variant);
1138 }
1139 
FindOrCreate(const CSeq_id & id)1140 CSeq_id_Handle CSeq_id_Textseq_Tree::FindOrCreate(const CSeq_id& id)
1141 {
1142     _ASSERT(x_Check(id));
1143     const CTextseq_id& tid = x_Get(id);
1144     if ( s_PackTextidEnabled() &&
1145          tid.IsSetAccession() && !tid.IsSetName() && !tid.IsSetRelease() ) {
1146         const string& acc = tid.GetAccession();
1147         TPackedKey key = CSeq_id_Textseq_Info::ParseAcc(acc, tid);
1148         if ( key ) {
1149             TPacked packed = CSeq_id_Textseq_Info::Pack(key, tid);
1150             CSeq_id_Handle::TVariant variant = 0;
1151             TWriteLockGuard guard(m_TreeLock);
1152             TPackedMap_I it = m_PackedMap.lower_bound(key);
1153             if ( it == m_PackedMap.end() ||
1154                  m_PackedMap.key_comp()(key, it->first) ) {
1155                 CConstRef<CSeq_id_Textseq_Info> info
1156                     (new CSeq_id_Textseq_Info(id.Which(), m_Mapper, key));
1157                 it = m_PackedMap.insert(it, TPackedMapValue(key, info));
1158             }
1159             else {
1160                 variant = it->first.ParseCaseVariant(acc);
1161             }
1162             return CSeq_id_Handle(it->second, packed, variant);
1163         }
1164     }
1165     TWriteLockGuard guard(m_TreeLock);
1166     CSeq_id_Textseq_PlainInfo* info = x_FindStrInfo(id.Which(), tid);
1167     CSeq_id_Handle::TVariant variant = 0;
1168     if ( !info ) {
1169         CRef<CSeq_id> ref_id(new CSeq_id);
1170         s_AssignSeq_id(*ref_id, id);
1171         info = new CSeq_id_Textseq_PlainInfo(ref_id, m_Mapper);
1172         if ( tid.IsSetAccession() ) {
1173             m_ByAcc.insert(TStringMapValue(tid.GetAccession(), info));
1174         }
1175         if ( tid.IsSetName() ) {
1176             m_ByName.insert(TStringMapValue(tid.GetName(), info));
1177         }
1178     }
1179     else {
1180         variant = info->ParseCaseVariant(tid);
1181     }
1182     return CSeq_id_Handle(info, 0, variant);
1183 }
1184 
1185 
x_Erase(TStringMap & str_map,const string & key,const CSeq_id_Info * info)1186 void CSeq_id_Textseq_Tree::x_Erase(TStringMap& str_map,
1187                                    const string& key,
1188                                    const CSeq_id_Info* info)
1189 {
1190     for ( TStringMap::iterator it = str_map.find(key);
1191           it != str_map.end() && NStr::EqualNocase(it->first, key);
1192           ++it ) {
1193         if ( it->second == info ) {
1194             str_map.erase(it);
1195             return;
1196         }
1197     }
1198 }
1199 
1200 
x_Unindex(const CSeq_id_Info * info)1201 void CSeq_id_Textseq_Tree::x_Unindex(const CSeq_id_Info* info)
1202 {
1203     if ( !m_PackedMap.empty() ) {
1204         const CSeq_id_Textseq_Info* sinfo =
1205             dynamic_cast<const CSeq_id_Textseq_Info*>(info);
1206         if ( sinfo ) {
1207             m_PackedMap.erase(sinfo->GetKey());
1208             return;
1209         }
1210     }
1211     CConstRef<CSeq_id> tid_id = info->GetSeqId();
1212     _ASSERT(x_Check(*tid_id));
1213     const CTextseq_id& tid = x_Get(*tid_id);
1214     if ( tid.IsSetAccession() ) {
1215         x_Erase(m_ByAcc, tid.GetAccession(), info);
1216     }
1217     if ( tid.IsSetName() ) {
1218         x_Erase(m_ByName, tid.GetName(), info);
1219     }
1220 }
1221 
1222 
1223 static inline
x_IsDefaultSwissprotRelease(const string & release)1224 bool x_IsDefaultSwissprotRelease(const string& release)
1225 {
1226     return release == "reviewed"  ||  release == "unreviewed";
1227 }
1228 
1229 
x_FindMatchByAcc(TSeq_id_MatchList & id_list,const string & acc,const TVersion * ver) const1230 void CSeq_id_Textseq_Tree::x_FindMatchByAcc(TSeq_id_MatchList& id_list,
1231                                             const string& acc,
1232                                             const TVersion* ver) const
1233 {
1234     if ( !m_PackedMap.empty() ) {
1235         if ( TPackedKey key = CSeq_id_Textseq_Info::ParseAcc(acc, ver) ) {
1236             if ( key.IsSetVersion() ) {
1237                 // only same version
1238                 TPackedMap_CI it = m_PackedMap.find(key);
1239                 if ( it != m_PackedMap.end() ) {
1240                     TPacked packed = CSeq_id_Textseq_Info::Pack(key, acc);
1241                     id_list.insert(CSeq_id_Handle(it->second, packed));
1242                 }
1243             }
1244             else {
1245                 // all versions
1246                 TPacked packed = 0;
1247                 for ( TPackedMap_CI it = m_PackedMap.lower_bound(key);
1248                       it != m_PackedMap.end() && it->first.SameHashNoVer(key);
1249                       ++it ) {
1250                     if ( it->first.EqualAcc(key) ) {
1251                         if ( packed == 0 ) {
1252                             packed = CSeq_id_Textseq_Info::Pack(key, acc);
1253                         }
1254                         _ASSERT(packed==CSeq_id_Textseq_Info::Pack(key, acc));
1255                         id_list.insert(CSeq_id_Handle(it->second, packed));
1256                     }
1257                 }
1258             }
1259         }
1260     }
1261 
1262     for ( TStringMapCI vit = m_ByAcc.find(acc);
1263           vit != m_ByAcc.end() && NStr::EqualNocase(vit->first, acc);
1264           ++vit ) {
1265         if ( ver ) {
1266             CConstRef<CSeq_id> tst_id = vit->second->GetSeqId();
1267             const CTextseq_id& tst = x_Get(*tst_id);
1268             // acc.ver should match
1269             if ( !tst.IsSetVersion() || tst.GetVersion() != *ver ) {
1270                 continue;
1271             }
1272         }
1273         id_list.insert(CSeq_id_Handle(vit->second));
1274     }
1275 }
1276 
1277 
1278 void
x_FindRevMatchByAccPacked(TSeq_id_MatchList & id_list,const string & acc,const TVersion * ver) const1279 CSeq_id_Textseq_Tree::x_FindRevMatchByAccPacked(TSeq_id_MatchList& id_list,
1280                                                 const string& acc,
1281                                                 const TVersion* ver) const
1282 {
1283     if ( !m_PackedMap.empty() ) {
1284         if ( TPackedKey key = CSeq_id_Textseq_Info::ParseAcc(acc, ver) ) {
1285             TPackedMap_CI it = m_PackedMap.find(key);
1286             if ( it != m_PackedMap.end() ) {
1287                 TPacked packed = CSeq_id_Textseq_Info::Pack(key, acc);
1288                 id_list.insert(CSeq_id_Handle(it->second, packed));
1289             }
1290             if ( key.IsSetVersion() ) {
1291                 // no version too
1292                 key.ResetVersion();
1293                 TPackedMap_CI itm = m_PackedMap.find(key);
1294                 if ( itm != m_PackedMap.end() ) {
1295                     TPacked packed = CSeq_id_Textseq_Info::Pack(key, acc);
1296                     id_list.insert(CSeq_id_Handle(itm->second, packed));
1297                 }
1298             }
1299         }
1300     }
1301 }
1302 
1303 
1304 void
x_FindRevMatchByAccNonPacked(TSeq_id_MatchList & id_list,const string & acc,const TVersion * ver) const1305 CSeq_id_Textseq_Tree::x_FindRevMatchByAccNonPacked(TSeq_id_MatchList& id_list,
1306                                                    const string& acc,
1307                                                    const TVersion* ver) const
1308 {
1309     for ( TStringMapCI vit = m_ByAcc.find(acc);
1310           vit != m_ByAcc.end() && NStr::EqualNocase(vit->first, acc);
1311           ++vit ) {
1312         CConstRef<CSeq_id> tst_id = vit->second->GetSeqId();
1313         const CTextseq_id& tst = x_Get(*tst_id);
1314         if ( tst.IsSetVersion() &&
1315              (!ver || tst.GetVersion() != *ver) ) {
1316             continue;
1317         }
1318         id_list.insert(CSeq_id_Handle(vit->second));
1319     }
1320 }
1321 
1322 
1323 inline
1324 void
x_FindRevMatchByAcc(TSeq_id_MatchList & id_list,const string & acc,const TVersion * ver) const1325 CSeq_id_Textseq_Tree::x_FindRevMatchByAcc(TSeq_id_MatchList& id_list,
1326                                           const string& acc,
1327                                           const TVersion* ver) const
1328 {
1329     x_FindRevMatchByAccPacked(id_list, acc, ver);
1330     x_FindRevMatchByAccNonPacked(id_list, acc, ver);
1331 }
1332 
1333 
x_FindMatchByName(TSeq_id_MatchList & id_list,const string & name,const CTextseq_id * tid) const1334 void CSeq_id_Textseq_Tree::x_FindMatchByName(TSeq_id_MatchList& id_list,
1335                                              const string& name,
1336                                              const CTextseq_id* tid) const
1337 {
1338     for ( TStringMapCI vit = m_ByName.find(name);
1339           vit != m_ByName.end() && NStr::EqualNocase(vit->first, name);
1340           ++vit ) {
1341         if ( tid ) {
1342             CConstRef<CSeq_id> tst_id = vit->second->GetSeqId();
1343             const CTextseq_id& tst = x_Get(*tst_id);
1344             // name.rel should match
1345             if ( tst.IsSetAccession() && tid->IsSetAccession() ) {
1346                 // both accessions are set.
1347                 // if they are the same - match will be found by accession,
1348                 // otherwise accessions are different and there is no match.
1349                 continue;
1350             }
1351             if ( tid->IsSetRelease() ) {
1352                 if ( tst.IsSetRelease()  ||
1353                      !(m_Type == CSeq_id::e_Swissprot &&
1354                        x_IsDefaultSwissprotRelease(tid->GetRelease())) ) {
1355                     if ( !tst.IsSetRelease() ||
1356                          tst.GetRelease() != tid->GetRelease() ) {
1357                         continue;
1358                     }
1359                 }
1360             }
1361         }
1362         id_list.insert(CSeq_id_Handle(vit->second));
1363     }
1364 }
1365 
1366 
x_FindRevMatchByName(TSeq_id_MatchList &,const string &,const CTextseq_id *) const1367 void CSeq_id_Textseq_Tree::x_FindRevMatchByName(TSeq_id_MatchList& /*id_list*/,
1368                                                 const string&      /*name*/,
1369                                                 const CTextseq_id* /*tid*/) const
1370 {
1371     /*
1372     for ( TStringMapCI vit = m_ByName.find(name);
1373           vit != m_ByName.end() && NStr::EqualNocase(vit->first, name);
1374           ++vit ) {
1375         if ( tid ) {
1376             CConstRef<CSeq_id> tst_id = vit->second->GetSeqId();
1377             const CTextseq_id& tst = x_Get(*tst_id);
1378             // name.rel should match
1379             if ( tst.IsSetAccession() && tid->IsSetAccession() ) {
1380                 // both accessions are set.
1381                 // if they are the same - match will be found by accession,
1382                 // otherwise accessions are different and there is no match.
1383                 continue;
1384             }
1385             if ( tid->IsSetRelease() ) {
1386                 if ( tst.IsSetRelease()  ||
1387                      !(m_Type == CSeq_id::e_Swissprot &&
1388                        x_IsDefaultSwissprotRelease(tid->GetRelease())) ) {
1389                     if ( !tst.IsSetRelease() ||
1390                          tst.GetRelease() != tid->GetRelease() ) {
1391                         continue;
1392                     }
1393                 }
1394             }
1395         }
1396         id_list.insert(CSeq_id_Handle(vit->second));
1397     }
1398     */
1399 }
1400 
1401 
HaveMatch(const CSeq_id_Handle &) const1402 bool CSeq_id_Textseq_Tree::HaveMatch(const CSeq_id_Handle& ) const
1403 {
1404     return true;
1405 }
1406 
1407 
FindMatch(const CSeq_id_Handle & id,TSeq_id_MatchList & id_list) const1408 void CSeq_id_Textseq_Tree::FindMatch(const CSeq_id_Handle& id,
1409                                      TSeq_id_MatchList& id_list) const
1410 {
1411     bool mine = x_Check(id.Which());
1412     if ( mine ) {
1413         id_list.insert(id);
1414     }
1415     TReadLockGuard guard(m_TreeLock);
1416     if ( id.IsPacked() ) {
1417         const CSeq_id_Textseq_Info* info =
1418             static_cast<const CSeq_id_Textseq_Info*>(GetInfo(id));
1419         if ( !m_ByAcc.empty() ) {
1420             // potentially whole search
1421             TStringMapCI it = m_ByAcc.lower_bound(info->GetAccPrefix());
1422             if ( it != m_ByAcc.end() && info->GoodPrefix(it->first) ) {
1423                 // have similar accessions
1424                 CTextseq_id tid;
1425                 info->Restore(tid, id.GetPacked(), id.GetVariant());
1426                 x_FindMatchByAcc(id_list, tid.GetAccession(), &tid);
1427                 // x_FindMatchByAcc will search packed accessions too
1428                 return;
1429             }
1430         }
1431         // only packed search -> no need to decode
1432         if ( !mine ) { // weak matching
1433             TPackedMap_CI iter = m_PackedMap.find(info->GetKey());
1434             if ( iter != m_PackedMap.end() ) {
1435                 id_list.insert(CSeq_id_Handle(iter->second, id.GetPacked(), id.GetVariant()));
1436             }
1437         }
1438         if ( !info->IsSetVersion() ) {
1439             // add all known versions
1440             const TPackedKey& key = info->GetKey();
1441             for ( TPackedMap_CI it = m_PackedMap.lower_bound(key);
1442                   it != m_PackedMap.end() && it->first.SameHashNoVer(key);
1443                   ++it ) {
1444                 if ( it->first.EqualAcc(key) ) {
1445                     id_list.insert(CSeq_id_Handle(it->second, id.GetPacked(), id.GetVariant()));
1446                 }
1447             }
1448         }
1449     }
1450     else {
1451         CConstRef<CSeq_id> tid_id = id.GetSeqId();
1452         const CTextseq_id* tid = tid_id->GetTextseq_Id();
1453         _ASSERT(tid);
1454         if ( tid->IsSetAccession() ) {
1455             x_FindMatchByAcc(id_list, tid->GetAccession(), tid);
1456         }
1457         if ( tid->IsSetName() ) {
1458             x_FindMatchByName(id_list, tid->GetName(), tid);
1459         }
1460     }
1461 }
1462 
1463 
FindMatchStr(const string & sid,TSeq_id_MatchList & id_list) const1464 void CSeq_id_Textseq_Tree::FindMatchStr(const string& sid,
1465                                         TSeq_id_MatchList& id_list) const
1466 {
1467     TReadLockGuard guard(m_TreeLock);
1468     // ignore '.' in the search string - cut it out
1469     SIZE_TYPE dot = sid.find('.');
1470     if ( dot != NPOS ) {
1471         string acc = sid.substr(0, dot);
1472         x_FindMatchByAcc(id_list, acc);
1473         x_FindMatchByName(id_list, acc);
1474     }
1475     else {
1476         x_FindMatchByAcc(id_list, sid);
1477         x_FindMatchByName(id_list, sid);
1478     }
1479 }
1480 
1481 
Match(const CSeq_id_Handle & h1,const CSeq_id_Handle & h2) const1482 bool CSeq_id_Textseq_Tree::Match(const CSeq_id_Handle& h1,
1483                                  const CSeq_id_Handle& h2) const
1484 {
1485     return CSeq_id_Which_Tree::Match(h1, h2);
1486 }
1487 
1488 
1489 inline
x_GetVersion(int & version,const CSeq_id_Handle & id) const1490 bool CSeq_id_Textseq_Tree::x_GetVersion(int& version,
1491                                         const CSeq_id_Handle& id) const
1492 {
1493     if ( id.IsPacked() ) {
1494         const CSeq_id_Textseq_Info* info =
1495             static_cast<const CSeq_id_Textseq_Info*>(GetInfo(id));
1496         if ( !info->IsSetVersion() ) {
1497             version = 0;
1498             return false;
1499         }
1500         version = info->GetVersion();
1501         return true;
1502     }
1503     else {
1504         CConstRef<CSeq_id> id1 = id.GetSeqId();
1505         const CTextseq_id* tid1 = id1->GetTextseq_Id();
1506         if ( !tid1->IsSetVersion() ) {
1507             version = 0;
1508             return false;
1509         }
1510         version = tid1->GetVersion();
1511         return true;
1512     }
1513 }
1514 
1515 
IsBetterVersion(const CSeq_id_Handle & h1,const CSeq_id_Handle & h2) const1516 bool CSeq_id_Textseq_Tree::IsBetterVersion(const CSeq_id_Handle& h1,
1517                                            const CSeq_id_Handle& h2) const
1518 {
1519     // Compare versions. If only one of the two ids has version,
1520     // consider it as better.
1521     int version1, version2;
1522     return x_GetVersion(version1, h1) &&
1523         (!x_GetVersion(version2, h2) || version1 > version2);
1524 }
1525 
1526 
HaveReverseMatch(const CSeq_id_Handle &) const1527 bool CSeq_id_Textseq_Tree::HaveReverseMatch(const CSeq_id_Handle&) const
1528 {
1529     return true;
1530 }
1531 
1532 
FindReverseMatch(const CSeq_id_Handle & id,TSeq_id_MatchList & id_list)1533 void CSeq_id_Textseq_Tree::FindReverseMatch(const CSeq_id_Handle& id,
1534                                             TSeq_id_MatchList& id_list)
1535 {
1536     bool mine = x_Check(id.Which());
1537     if ( mine ) {
1538         id_list.insert(id);
1539     }
1540     if ( id.IsPacked() ) {
1541         TReadLockGuard guard(m_TreeLock);
1542         const CSeq_id_Textseq_Info* info =
1543             static_cast<const CSeq_id_Textseq_Info*>(GetInfo(id));
1544         if ( !mine ) { // weak matching
1545             TPackedMap_CI iter = m_PackedMap.find(info->GetKey());
1546             if ( iter != m_PackedMap.end() ) {
1547                 id_list.insert(CSeq_id_Handle(iter->second, id.GetPacked(), id.GetVariant()));
1548             }
1549         }
1550         if ( info->IsSetVersion() ) {
1551             TPackedKey key = info->GetKey();
1552             key.ResetVersion();
1553             TPackedMap_CI it = m_PackedMap.find(key);
1554             if ( it != m_PackedMap.end() ) {
1555                 id_list.insert(CSeq_id_Handle(it->second, id.GetPacked(), id.GetVariant()));
1556             }
1557         }
1558         if ( !m_ByAcc.empty() ) {
1559             // look for non-packed variants that may have set name or revision
1560             string acc;
1561             info->RestoreAccession(acc, id.GetPacked(), id.GetVariant());
1562             x_FindRevMatchByAccNonPacked
1563                 (id_list, acc, info->IsSetVersion()? &info->GetVersion(): 0);
1564         }
1565         return;
1566     }
1567 
1568     CConstRef<CSeq_id> orig_id = id.GetSeqId();
1569     const CTextseq_id& orig_tid = x_Get(*orig_id);
1570 
1571     if ( true || !mine ) { // this code should be enough
1572         TReadLockGuard guard(m_TreeLock);
1573         // search only existing accessions
1574         if ( orig_tid.IsSetAccession() ) {
1575             x_FindRevMatchByAcc(id_list, orig_tid.GetAccession(), &orig_tid);
1576         }
1577         if ( orig_tid.IsSetName() ) {
1578             x_FindRevMatchByName(id_list, orig_tid.GetName(), &orig_tid);
1579         }
1580         return;
1581     }
1582 }
1583 
1584 
Dump(CNcbiOstream & out,CSeq_id::E_Choice type,int details) const1585 size_t CSeq_id_Textseq_Tree::Dump(CNcbiOstream& out,
1586                                   CSeq_id::E_Choice type,
1587                                   int details) const
1588 {
1589     size_t total_bytes = 0;
1590     if ( details >= CSeq_id_Mapper::eDumpStatistics ) {
1591         out << "CSeq_id_Handles("<<CSeq_id::SelectionName(type)<<"): "<<endl;
1592     }
1593     {{
1594         size_t size = m_ByAcc.size() + m_ByName.size();
1595         size_t elem_size = 0, extra_size = 0;
1596         if ( size ) {
1597             elem_size = sizeof(string)+sizeof(void*); // map value
1598             elem_size += sizeof(int)+3*sizeof(void*); // red/black tree
1599             elem_size += sizeof(CSeq_id_Info); //
1600             elem_size += sizeof(CSeq_id); //
1601             elem_size += sizeof(CTextseq_id); //
1602             // malloc overhead:
1603             // map value, CSeq_id_Info, CSeq_id, CTextseq_id
1604             elem_size += 4*kMallocOverhead;
1605             ITERATE ( TStringMap, it, m_ByAcc ) {
1606                 CConstRef<CSeq_id> id_id = it->second->GetSeqId();
1607                 const CTextseq_id& id = *id_id->GetTextseq_Id();
1608                 extra_size += sx_StringMemory(id.GetAccession());
1609                 if ( id.IsSetName() ) {
1610                     extra_size += sx_StringMemory(id.GetName());
1611                 }
1612                 if ( id.IsSetRelease() ) {
1613                     extra_size += sx_StringMemory(id.GetRelease());
1614                 }
1615             }
1616         }
1617         size_t bytes = extra_size + size*elem_size;
1618         total_bytes += bytes;
1619         if ( details >= CSeq_id_Mapper::eDumpStatistics ) {
1620             out << " "<<size << " handles, "<<bytes<<" bytes"<<endl;
1621         }
1622     }}
1623     {{
1624         size_t size = m_PackedMap.size(), elem_size = 0, extra_size = 0;
1625         if ( size ) {
1626             elem_size = sizeof(TPackedKey)+sizeof(void*);
1627             elem_size += sizeof(int)+3*sizeof(void*); // red/black tree
1628             elem_size += sizeof(CSeq_id_Textseq_Info); //
1629             // malloc overhead:
1630             // map value, CSeq_id_Textseq_Info
1631             elem_size += 2*kMallocOverhead;
1632             ITERATE ( TPackedMap, it, m_PackedMap ) {
1633                 //extra_size += sx_StringMemory(it->first.m_Prefix);
1634             }
1635         }
1636         size_t bytes = extra_size + size*elem_size;
1637         total_bytes += bytes;
1638         if ( details >= CSeq_id_Mapper::eDumpStatistics ) {
1639             out << " " <<size << " packed handles, "<<bytes<<" bytes"<<endl;
1640         }
1641     }}
1642     if ( details >= CSeq_id_Mapper::eDumpAllIds ) {
1643         ITERATE ( TStringMap, it, m_ByAcc ) {
1644             CConstRef<CSeq_id> id = it->second->GetSeqId();
1645             out << "  " << id->AsFastaString() << endl;
1646         }
1647         ITERATE ( TPackedMap, it, m_PackedMap ) {
1648             out << "  packed prefix "
1649                 << it->first.GetAccPrefix()<<"."<<it->first.m_Version << endl;
1650         }
1651     }
1652     return total_bytes;
1653 }
1654 
1655 /////////////////////////////////////////////////////////////////////////////
1656 // CSeq_id_GB_Tree
1657 /////////////////////////////////////////////////////////////////////////////
1658 
CSeq_id_GB_Tree(CSeq_id_Mapper * mapper)1659 CSeq_id_GB_Tree::CSeq_id_GB_Tree(CSeq_id_Mapper* mapper)
1660     : CSeq_id_Textseq_Tree(mapper, CSeq_id::e_not_set)
1661 {
1662 }
1663 
1664 
x_Check(const CSeq_id::E_Choice & type) const1665 bool CSeq_id_GB_Tree::x_Check(const CSeq_id::E_Choice& type) const
1666 {
1667     return
1668         type == CSeq_id::e_Genbank  ||
1669         type == CSeq_id::e_Embl  ||
1670         type == CSeq_id::e_Ddbj;
1671 }
1672 
1673 
1674 /////////////////////////////////////////////////////////////////////////////
1675 // CSeq_id_Pir_Tree
1676 /////////////////////////////////////////////////////////////////////////////
1677 
CSeq_id_Pir_Tree(CSeq_id_Mapper * mapper)1678 CSeq_id_Pir_Tree::CSeq_id_Pir_Tree(CSeq_id_Mapper* mapper)
1679     : CSeq_id_Textseq_Tree(mapper, CSeq_id::e_Pir)
1680 {
1681 }
1682 
1683 
1684 /////////////////////////////////////////////////////////////////////////////
1685 // CSeq_id_Swissprot_Tree
1686 /////////////////////////////////////////////////////////////////////////////
1687 
CSeq_id_Swissprot_Tree(CSeq_id_Mapper * mapper)1688 CSeq_id_Swissprot_Tree::CSeq_id_Swissprot_Tree(CSeq_id_Mapper* mapper)
1689     : CSeq_id_Textseq_Tree(mapper, CSeq_id::e_Swissprot)
1690 {
1691 }
1692 
1693 
1694 /////////////////////////////////////////////////////////////////////////////
1695 // CSeq_id_Prf_Tree
1696 /////////////////////////////////////////////////////////////////////////////
1697 
CSeq_id_Prf_Tree(CSeq_id_Mapper * mapper)1698 CSeq_id_Prf_Tree::CSeq_id_Prf_Tree(CSeq_id_Mapper* mapper)
1699     : CSeq_id_Textseq_Tree(mapper, CSeq_id::e_Prf)
1700 {
1701 }
1702 
1703 
1704 /////////////////////////////////////////////////////////////////////////////
1705 // CSeq_id_Tpg_Tree
1706 /////////////////////////////////////////////////////////////////////////////
1707 
CSeq_id_Tpg_Tree(CSeq_id_Mapper * mapper)1708 CSeq_id_Tpg_Tree::CSeq_id_Tpg_Tree(CSeq_id_Mapper* mapper)
1709     : CSeq_id_Textseq_Tree(mapper, CSeq_id::e_Tpg)
1710 {
1711 }
1712 
1713 
1714 /////////////////////////////////////////////////////////////////////////////
1715 // CSeq_id_Tpe_Tree
1716 /////////////////////////////////////////////////////////////////////////////
1717 
CSeq_id_Tpe_Tree(CSeq_id_Mapper * mapper)1718 CSeq_id_Tpe_Tree::CSeq_id_Tpe_Tree(CSeq_id_Mapper* mapper)
1719     : CSeq_id_Textseq_Tree(mapper, CSeq_id::e_Tpe)
1720 {
1721 }
1722 
1723 
1724 /////////////////////////////////////////////////////////////////////////////
1725 // CSeq_id_Tpd_Tree
1726 /////////////////////////////////////////////////////////////////////////////
1727 
CSeq_id_Tpd_Tree(CSeq_id_Mapper * mapper)1728 CSeq_id_Tpd_Tree::CSeq_id_Tpd_Tree(CSeq_id_Mapper* mapper)
1729     : CSeq_id_Textseq_Tree(mapper, CSeq_id::e_Tpd)
1730 {
1731 }
1732 
1733 
1734 /////////////////////////////////////////////////////////////////////////////
1735 // CSeq_id_Gpipe_Tree
1736 /////////////////////////////////////////////////////////////////////////////
1737 
CSeq_id_Gpipe_Tree(CSeq_id_Mapper * mapper)1738 CSeq_id_Gpipe_Tree::CSeq_id_Gpipe_Tree(CSeq_id_Mapper* mapper)
1739     : CSeq_id_Textseq_Tree(mapper, CSeq_id::e_Gpipe)
1740 {
1741 }
1742 
1743 
1744 /////////////////////////////////////////////////////////////////////////////
1745 // CSeq_id_Named_annot_track_Tree
1746 /////////////////////////////////////////////////////////////////////////////
1747 
CSeq_id_Named_annot_track_Tree(CSeq_id_Mapper * mapper)1748 CSeq_id_Named_annot_track_Tree::CSeq_id_Named_annot_track_Tree(CSeq_id_Mapper* mapper)
1749     : CSeq_id_Textseq_Tree(mapper, CSeq_id::e_Named_annot_track)
1750 {
1751 }
1752 
1753 
1754 /////////////////////////////////////////////////////////////////////////////
1755 // CSeq_id_Other_Tree
1756 /////////////////////////////////////////////////////////////////////////////
1757 
CSeq_id_Other_Tree(CSeq_id_Mapper * mapper)1758 CSeq_id_Other_Tree::CSeq_id_Other_Tree(CSeq_id_Mapper* mapper)
1759     : CSeq_id_Textseq_Tree(mapper, CSeq_id::e_Other)
1760 {
1761 }
1762 
1763 
1764 /////////////////////////////////////////////////////////////////////////////
1765 // CSeq_id_Local_Tree
1766 /////////////////////////////////////////////////////////////////////////////
1767 
1768 
CSeq_id_Local_Tree(CSeq_id_Mapper * mapper)1769 CSeq_id_Local_Tree::CSeq_id_Local_Tree(CSeq_id_Mapper* mapper)
1770     : CSeq_id_Which_Tree(mapper)
1771 {
1772 }
1773 
1774 
~CSeq_id_Local_Tree(void)1775 CSeq_id_Local_Tree::~CSeq_id_Local_Tree(void)
1776 {
1777 }
1778 
1779 
Empty(void) const1780 bool CSeq_id_Local_Tree::Empty(void) const
1781 {
1782     return m_ByStr.empty() && m_ById.empty();
1783 }
1784 
1785 
sx_AllDigits(const string & s)1786 static inline bool sx_AllDigits(const string& s)
1787 {
1788     ITERATE ( string, i, s ) {
1789         if ( !isdigit(Uint1(*i)) ) {
1790             return false;
1791         }
1792     }
1793     return true;
1794 }
1795 
1796 
sx_ParseLocalStrId(const string & str,CObject_id::TId & id)1797 static bool sx_ParseLocalStrId(const string& str, CObject_id::TId& id)
1798 {
1799     CObject_id::TId value = NStr::StringToNumeric<CObject_id::TId>(str, NStr::fConvErr_NoThrow);
1800     if ( !value ) {
1801         if ( errno ) {
1802             // not convertible to integer
1803             return false;
1804         }
1805         // converted to 0
1806         if ( str.size() != 1 ) {
1807             // leading zeroes are not allowed
1808             return false;
1809         }
1810         // valid zero as a string
1811         id = 0;
1812         return true;
1813     }
1814     else if ( value > 0 ) {
1815         // non-zero positive value
1816         if ( str[0] == '0' || str[0] == '+' ) {
1817             // redundant '+' or leading zeroes are not allowed
1818             return false;
1819         }
1820         // valid positive as a string
1821         id = value;
1822         return true;
1823     }
1824     else {
1825         // non-zero negative value
1826         if ( str[0] != '-' || str[1] == '0' ) {
1827             // leading zeroes are not allowed
1828             return false;
1829         }
1830         // valid negative as a string
1831         id = value;
1832         return true;
1833     }
1834 }
1835 
1836 
CSeq_id_Local_Info(const CObject_id & oid,CSeq_id_Mapper * mapper)1837 CSeq_id_Local_Info::CSeq_id_Local_Info(const CObject_id& oid, CSeq_id_Mapper* mapper)
1838     : CSeq_id_Info(CSeq_id::e_Local, mapper),
1839       m_IsId(oid.IsId())
1840 {
1841     CRef<CSeq_id> seq_id(new CSeq_id);
1842     CObject_id& oid2 = seq_id->SetLocal();
1843     if ( IsId() ) {
1844         m_HasMatchingId = true;
1845         m_MatchingId = oid.GetId();
1846         oid2.SetId(oid.GetId());
1847     }
1848     else {
1849         m_HasMatchingId = sx_ParseLocalStrId(oid.GetStr(), m_MatchingId);
1850         oid2.SetStr(oid.GetStr());
1851     }
1852     m_Seq_id = move(seq_id);
1853 }
1854 
1855 
~CSeq_id_Local_Info()1856 CSeq_id_Local_Info::~CSeq_id_Local_Info()
1857 {
1858 }
1859 
1860 
1861 inline CSeq_id_Handle::TVariant
ParseCaseVariant(const string & str) const1862 CSeq_id_Local_Info::ParseCaseVariant(const string& str) const
1863 {
1864     return s_ParseCaseVariant(m_Seq_id->GetLocal().GetStr(), str).first;
1865 }
1866 
1867 
1868 inline CSeq_id_Handle::TVariant
ParseCaseVariant(const CObject_id & oid) const1869 CSeq_id_Local_Info::ParseCaseVariant(const CObject_id& oid) const
1870 {
1871     if ( !oid.IsStr() ) {
1872         return 0;
1873     }
1874     return ParseCaseVariant(oid.GetStr());
1875 }
1876 
1877 
GetPackedSeqId(TPacked packed,TVariant variant) const1878 CConstRef<CSeq_id> CSeq_id_Local_Info::GetPackedSeqId(TPacked packed, TVariant variant) const
1879 {
1880     if ( !variant ) {
1881         return m_Seq_id;
1882     }
1883     CRef<CSeq_id> ret(new CSeq_id);
1884     const CObject_id& src = m_Seq_id->GetLocal();
1885     CObject_id& oid = ret->SetLocal();
1886     if ( IsId() ) {
1887         oid.SetId(src.GetId());
1888     }
1889     else {
1890         string& str = oid.SetStr();
1891         str = src.GetStr();
1892         s_RestoreCaseVariant(str, variant);
1893     }
1894     return ret;
1895 }
1896 
1897 
x_FindStrInfo(const string & str) const1898 CSeq_id_Local_Info* CSeq_id_Local_Tree::x_FindStrInfo(const string& str) const
1899 {
1900     TByStr::const_iterator it = m_ByStr.find(str);
1901     if ( it != m_ByStr.end() ) {
1902         return it->second;
1903     }
1904     // Not found
1905     return 0;
1906 }
1907 
1908 
x_FindIdInfo(CObject_id::TId id) const1909 CSeq_id_Local_Info* CSeq_id_Local_Tree::x_FindIdInfo(CObject_id::TId id) const
1910 {
1911     TById::const_iterator it = m_ById.find(id);
1912     if ( it != m_ById.end() ) {
1913         return it->second;
1914     }
1915     // Not found
1916     return 0;
1917 }
1918 
1919 
x_FindInfo(const CObject_id & oid) const1920 CSeq_id_Local_Info* CSeq_id_Local_Tree::x_FindInfo(const CObject_id& oid) const
1921 {
1922     if ( oid.IsStr() ) {
1923         return x_FindStrInfo(oid.GetStr());
1924     }
1925     else {
1926         return x_FindIdInfo(oid.GetId());
1927     }
1928 }
1929 
1930 
FindInfo(const CSeq_id & id) const1931 CSeq_id_Handle CSeq_id_Local_Tree::FindInfo(const CSeq_id& id) const
1932 {
1933     _ASSERT( id.IsLocal() );
1934     const CObject_id& oid = id.GetLocal();
1935     TReadLockGuard guard(m_TreeLock);
1936     CSeq_id_Local_Info* info = x_FindInfo(oid);
1937     CSeq_id_Handle::TVariant variant = info? info->ParseCaseVariant(oid): 0;
1938     return CSeq_id_Handle(info, 0, variant);
1939 }
1940 
1941 
FindOrCreate(const CSeq_id & id)1942 CSeq_id_Handle CSeq_id_Local_Tree::FindOrCreate(const CSeq_id& id)
1943 {
1944     const CObject_id& oid = id.GetLocal();
1945     TWriteLockGuard guard(m_TreeLock);
1946     CSeq_id_Local_Info*& info = oid.IsStr()? m_ByStr[oid.GetStr()]: m_ById[oid.GetId()];
1947     CSeq_id_Handle::TVariant variant = 0;
1948     if ( !info ) {
1949         info = new CSeq_id_Local_Info(oid, m_Mapper);
1950     }
1951     else {
1952         variant = info->ParseCaseVariant(oid);
1953     }
1954     return CSeq_id_Handle(info, 0, variant);
1955 }
1956 
1957 
x_Unindex(const CSeq_id_Info * info)1958 void CSeq_id_Local_Tree::x_Unindex(const CSeq_id_Info* info)
1959 {
1960     CConstRef<CSeq_id> id = info->GetSeqId();
1961     _ASSERT(id->IsLocal());
1962     const CObject_id& oid = id->GetLocal();
1963 
1964     if ( oid.IsStr() ) {
1965         _VERIFY(m_ByStr.erase(oid.GetStr()));
1966     }
1967     else if ( oid.IsId() ) {
1968         _VERIFY(m_ById.erase(oid.GetId()));
1969     }
1970 }
1971 
1972 
HaveMatch(const CSeq_id_Handle & id) const1973 bool CSeq_id_Local_Tree::HaveMatch(const CSeq_id_Handle& id) const
1974 {
1975     // match id <-> str(number)
1976     const CSeq_id_Local_Info* sinfo =
1977         static_cast<const CSeq_id_Local_Info*>(id.x_GetInfo());
1978     return sinfo->IsId() || sinfo->HasMatchingId();
1979 }
1980 
1981 
FindMatch(const CSeq_id_Handle & id,TSeq_id_MatchList & id_list) const1982 void CSeq_id_Local_Tree::FindMatch(const CSeq_id_Handle& id,
1983                                    TSeq_id_MatchList& id_list) const
1984 {
1985     id_list.insert(id);
1986     // match id <-> str(number)
1987     const CSeq_id_Local_Info* sinfo =
1988         static_cast<const CSeq_id_Local_Info*>(id.x_GetInfo());
1989     TReadLockGuard guard(m_TreeLock);
1990     if ( sinfo->IsId() ) {
1991         // id -> str
1992         if ( CSeq_id_Info* id2 = x_FindStrInfo(NStr::NumericToString(sinfo->GetMatchingId())) ) {
1993             id_list.insert(CSeq_id_Handle(id2));
1994         }
1995     }
1996     else if ( sinfo->HasMatchingId() ) {
1997         // str -> id
1998         if ( CSeq_id_Info* id2 = x_FindIdInfo(sinfo->GetMatchingId()) ) {
1999             id_list.insert(CSeq_id_Handle(id2));
2000         }
2001     }
2002 }
2003 
2004 
FindMatchStr(const string & str,TSeq_id_MatchList & id_list) const2005 void CSeq_id_Local_Tree::FindMatchStr(const string& str,
2006                                       TSeq_id_MatchList& id_list) const
2007 {
2008     CObject_id::TId id;
2009     bool has_matching_id = sx_ParseLocalStrId(str, id);
2010     TReadLockGuard guard(m_TreeLock);
2011     // In any case search in strings
2012     if ( CSeq_id_Info* id2 = x_FindStrInfo(str) ) {
2013         id_list.insert(CSeq_id_Handle(id2));
2014     }
2015     // search possible int match
2016     if ( has_matching_id ) {
2017         if ( CSeq_id_Info* id2 = x_FindIdInfo(id) ) {
2018             id_list.insert(CSeq_id_Handle(id2));
2019         }
2020     }
2021 }
2022 
2023 
Dump(CNcbiOstream & out,CSeq_id::E_Choice type,int details) const2024 size_t CSeq_id_Local_Tree::Dump(CNcbiOstream& out,
2025                                 CSeq_id::E_Choice type,
2026                                 int details) const
2027 {
2028     size_t total_bytes = 0;
2029     if ( details >= CSeq_id_Mapper::eDumpStatistics ) {
2030         out << "CSeq_id_Handles("<<CSeq_id::SelectionName(type)<<"): "<<endl;
2031     }
2032     {{
2033         size_t size = m_ByStr.size(), elem_size = 0, extra_size = 0;
2034         if ( size ) {
2035             elem_size = sizeof(string)+sizeof(void*); // map value
2036             elem_size += sizeof(int)+3*sizeof(void*); // red/black tree
2037             elem_size += sizeof(CSeq_id_Info); //
2038             elem_size += sizeof(CSeq_id); //
2039             elem_size += sizeof(CObject_id); //
2040             // malloc overhead:
2041             // map value, CSeq_id_Info, CSeq_id, CObject_id
2042             elem_size += 4*kMallocOverhead;
2043             ITERATE ( TByStr, it, m_ByStr ) {
2044                 extra_size += sx_StringMemory(it->first);
2045             }
2046         }
2047         size_t bytes = extra_size + size*elem_size;
2048         total_bytes += bytes;
2049         if ( details >= CSeq_id_Mapper::eDumpStatistics ) {
2050             out << " " <<size << " str handles, "<<bytes<<" bytes" << endl;
2051         }
2052     }}
2053     {{
2054         size_t size = m_ById.size(), elem_size = 0;
2055         if ( size ) {
2056             elem_size = sizeof(int)+sizeof(void*);
2057             elem_size += sizeof(int)+3*sizeof(void*); // red/black tree
2058             elem_size += sizeof(CSeq_id_Info); //
2059             elem_size += sizeof(CSeq_id); //
2060             elem_size += sizeof(CObject_id); //
2061             // malloc overhead:
2062             // map value, CSeq_id_Info, CSeq_id, CObject_id
2063             elem_size += 4*kMallocOverhead;
2064         }
2065         size_t bytes = size*elem_size;
2066         total_bytes += bytes;
2067         if ( details >= CSeq_id_Mapper::eDumpStatistics ) {
2068             out << " "<<size << " int handles, "<<bytes<<" bytes" << endl;
2069         }
2070     }}
2071     if ( details >= CSeq_id_Mapper::eDumpAllIds ) {
2072         ITERATE ( TByStr, it, m_ByStr ) {
2073             out << "  " << it->second->GetSeqId()->AsFastaString() << endl;
2074         }
2075         ITERATE ( TById, it, m_ById ) {
2076             out << "  " << it->second->GetSeqId()->AsFastaString() << endl;
2077         }
2078     }
2079     return total_bytes;
2080 }
2081 
2082 /////////////////////////////////////////////////////////////////////////////
2083 // CSeq_id_General_Id_Info
2084 /////////////////////////////////////////////////////////////////////////////
2085 
2086 
CSeq_id_General_Id_Info(CSeq_id_Mapper * mapper,const TKey & key)2087 CSeq_id_General_Id_Info::CSeq_id_General_Id_Info(CSeq_id_Mapper* mapper,
2088                                                  const TKey& key)
2089     : CSeq_id_Info(CSeq_id::e_General, mapper),
2090       m_Key(key)
2091 {
2092 }
2093 
2094 
~CSeq_id_General_Id_Info(void)2095 CSeq_id_General_Id_Info::~CSeq_id_General_Id_Info(void)
2096 {
2097 }
2098 
2099 
2100 inline
2101 CSeq_id_General_Id_Info::TPacked
Pack(const TKey &,const CDbtag & dbtag)2102 CSeq_id_General_Id_Info::Pack(const TKey& /*key*/, const CDbtag& dbtag)
2103 {
2104     TPacked id = dbtag.GetTag().GetId();
2105     if ( id <= 0 ) {
2106         --id;
2107     }
2108     return id;
2109 }
2110 
2111 
Restore(CDbtag & dbtag,TPacked param,TVariant variant) const2112 void CSeq_id_General_Id_Info::Restore(CDbtag& dbtag, TPacked param, TVariant variant) const
2113 {
2114     if ( !dbtag.IsSetDb() ) {
2115         dbtag.SetDb(GetDbtag());
2116     }
2117     if ( param < 0 ) {
2118         ++param;
2119     }
2120     dbtag.SetTag().SetId(CObject_id::TId(param));
2121     s_RestoreCaseVariant(dbtag.SetDb(), variant);
2122 }
2123 
2124 
GetPackedSeqId(TPacked param,TVariant variant) const2125 CConstRef<CSeq_id> CSeq_id_General_Id_Info::GetPackedSeqId(TPacked param, TVariant variant) const
2126 {
2127     CConstRef<CSeq_id> ret;
2128     if ( variant ) {
2129         // all non-initial case variants need fresh Seq-id to start with
2130         ret = new CSeq_id;
2131     }
2132     else {
2133         // otherwise try to use shared Seq-id if it's not referenced anywhere else
2134         typedef CSeq_id_General_Id_Info TThis;
2135 #if defined NCBI_SLOW_ATOMIC_SWAP
2136         CFastMutexGuard guard(sx_GetSeqIdMutex);
2137         ret = m_Seq_id;
2138         const_cast<TThis*>(this)->m_Seq_id.Reset();
2139         if ( !ret || !ret->ReferencedOnlyOnce() ) {
2140             ret.Reset(new CSeq_id);
2141         }
2142         const_cast<TThis*>(this)->m_Seq_id = ret;
2143 #else
2144         const_cast<TThis*>(this)->m_Seq_id.AtomicReleaseTo(ret);
2145         if ( !ret || !ret->ReferencedOnlyOnce() ) {
2146             ret.Reset(new CSeq_id);
2147         }
2148         const_cast<TThis*>(this)->m_Seq_id.AtomicResetFrom(ret);
2149 #endif
2150     }
2151     Restore(const_cast<CSeq_id&>(*ret).SetGeneral(), param, variant);
2152     return ret;
2153 }
2154 
2155 
2156 /////////////////////////////////////////////////////////////////////////////
2157 // CSeq_id_General_Str_Info
2158 /////////////////////////////////////////////////////////////////////////////
2159 
2160 
CSeq_id_General_Str_Info(CSeq_id_Mapper * mapper,const TKey & key)2161 CSeq_id_General_Str_Info::CSeq_id_General_Str_Info(CSeq_id_Mapper* mapper,
2162                                                  const TKey& key)
2163     : CSeq_id_Info(CSeq_id::e_General, mapper),
2164       m_Key(key)
2165 {
2166 }
2167 
2168 
~CSeq_id_General_Str_Info(void)2169 CSeq_id_General_Str_Info::~CSeq_id_General_Str_Info(void)
2170 {
2171 }
2172 
2173 
2174 inline
ParseCaseVariant(const CDbtag & dbtag) const2175 CSeq_id_Handle::TVariant CSeq_id_General_Str_Info::TKey::ParseCaseVariant(const CDbtag& dbtag) const
2176 {
2177     auto t1 = s_ParseCaseVariant(m_Db, dbtag.GetDb());
2178     const char* str = dbtag.GetTag().GetStr().data();
2179     auto t2 = s_ParseCaseVariant(m_StrPrefix, str, t1.second);
2180     auto t3 = s_ParseCaseVariant(m_StrSuffix, str+m_StrPrefix.size()+GetStrDigits(), t2.second);
2181     return t1.first | t2.first | t3.first;
2182 }
2183 
2184 
2185 CSeq_id_General_Str_Info::TKey
Parse(const CDbtag & dbtag)2186 CSeq_id_General_Str_Info::Parse(const CDbtag& dbtag)
2187 {
2188     TKey key;
2189     key.m_Key = 0;
2190     const string& str = dbtag.GetTag().GetStr();
2191     size_t len = str.size(), prefix_len = len, str_digits = 0;
2192     // find longest digit substring
2193     size_t cur_digits = 0, total_digits = 0;
2194     for ( ssize_t i = len; i >= 0; ) {
2195         char c = --i < 0? 0: str[i];
2196         if ( c >= '0' && c <= '9' ) {
2197             ++total_digits;
2198             ++cur_digits;
2199         }
2200         else {
2201             if ( !str_digits || cur_digits > str_digits+2 ) {
2202                 str_digits = cur_digits;
2203                 prefix_len = i+1;
2204             }
2205             cur_digits = 0;
2206         }
2207     }
2208     if ( str_digits > 9 ) {
2209         prefix_len += str_digits - 9;
2210         total_digits += str_digits - 9;
2211         str_digits = 9;
2212     }
2213     if ( str_digits*3 < total_digits*2 ) {
2214         // too many other digits
2215         return key;
2216     }
2217     key.m_Db = dbtag.GetDb();
2218     if ( prefix_len > 0 ) {
2219         key.m_StrPrefix = str.substr(0, prefix_len);
2220     }
2221     if ( prefix_len + str_digits < str.size() ) {
2222         key.m_StrSuffix = str.substr(prefix_len+str_digits);
2223     }
2224     TPacked hash = 1;
2225     if ( 1 ) {
2226         ITERATE(string, i, key.m_Db) {
2227             hash = hash*17 + toupper(Uint1(*i));
2228         }
2229         ITERATE ( string, i, key.m_StrPrefix ) {
2230             hash = hash*17 + toupper(Uint1(*i));
2231         }
2232         ITERATE(string, i, key.m_StrSuffix) {
2233             hash = hash*17 + toupper(Uint1(*i));
2234         }
2235     }
2236     else {
2237         for ( size_t i = 0; i < 3 && i < prefix_len; ++i ) {
2238             hash = (hash << 8) | toupper(key.m_StrPrefix[prefix_len-1-i] & 0xff);
2239         }
2240     }
2241     key.m_Key = (hash << 8) | TPacked(str_digits);
2242     return key;
2243 }
2244 
2245 
2246 inline
2247 CSeq_id_General_Str_Info::TPacked
Pack(const TKey & key,const CDbtag & dbtag)2248 CSeq_id_General_Str_Info::Pack(const TKey& key,
2249                                const CDbtag& dbtag)
2250 {
2251     TPacked id = s_ParseNumber(dbtag.GetTag().GetStr(),
2252                                key.m_StrPrefix.size(),
2253                                key.GetStrDigits());
2254     if ( id <= 0 ) {
2255         --id;
2256     }
2257     return id;
2258 }
2259 
2260 
Restore(CDbtag & dbtag,TPacked param,TVariant variant) const2261 void CSeq_id_General_Str_Info::Restore(CDbtag& dbtag, TPacked param, TVariant variant) const
2262 {
2263     if ( !dbtag.IsSetDb() ) {
2264         dbtag.SetDb(GetDbtag());
2265     }
2266     CObject_id& obj_id = dbtag.SetTag();
2267     if ( !obj_id.IsStr() ) {
2268         obj_id.SetStr(GetStrPrefix());
2269         string& str = obj_id.SetStr();
2270         str.resize(str.size() + GetStrDigits(), '0');
2271         if ( !GetStrSuffix().empty() ) {
2272             str += GetStrSuffix();
2273         }
2274     }
2275     if ( param < 0 ) {
2276         ++param;
2277     }
2278     s_RestoreNumber(obj_id.SetStr(), GetStrPrefix().size(), GetStrDigits(), param);
2279     variant = s_RestoreCaseVariant(dbtag.SetDb(), variant);
2280     s_RestoreCaseVariant(obj_id.SetStr(), variant);
2281 }
2282 
2283 
GetPackedSeqId(TPacked param,TVariant variant) const2284 CConstRef<CSeq_id> CSeq_id_General_Str_Info::GetPackedSeqId(TPacked param, TVariant variant) const
2285 {
2286     CConstRef<CSeq_id> ret;
2287     if ( variant ) {
2288         // all non-initial case variants need fresh Seq-id to start with
2289         ret = new CSeq_id;
2290     }
2291     else {
2292         // otherwise try to use shared Seq-id if it's not referenced anywhere else
2293         typedef CSeq_id_General_Str_Info TThis;
2294 #if defined NCBI_SLOW_ATOMIC_SWAP
2295         CFastMutexGuard guard(sx_GetSeqIdMutex);
2296         ret = m_Seq_id;
2297         const_cast<TThis*>(this)->m_Seq_id.Reset();
2298         if ( !ret || !ret->ReferencedOnlyOnce() ) {
2299             ret.Reset(new CSeq_id);
2300         }
2301         const_cast<TThis*>(this)->m_Seq_id = ret;
2302 #else
2303         const_cast<TThis*>(this)->m_Seq_id.AtomicReleaseTo(ret);
2304         if ( !ret || !ret->ReferencedOnlyOnce() ) {
2305             ret.Reset(new CSeq_id);
2306         }
2307         const_cast<TThis*>(this)->m_Seq_id.AtomicResetFrom(ret);
2308 #endif
2309     }
2310     Restore(const_cast<CSeq_id&>(*ret).SetGeneral(), param, variant);
2311     return ret;
2312 }
2313 
2314 
2315 /////////////////////////////////////////////////////////////////////////////
2316 // CSeq_id_General_PlainInfo
2317 /////////////////////////////////////////////////////////////////////////////
2318 
2319 
CSeq_id_General_PlainInfo(const CDbtag & dbid,CSeq_id_Mapper * mapper)2320 CSeq_id_General_PlainInfo::CSeq_id_General_PlainInfo(const CDbtag& dbid, CSeq_id_Mapper* mapper)
2321     : CSeq_id_Info(CSeq_id::e_General, mapper)
2322 {
2323     CRef<CSeq_id> seq_id(new CSeq_id);
2324     s_AssignDbtag(seq_id->SetGeneral(), dbid);
2325     m_Seq_id = move(seq_id);
2326 }
2327 
2328 
2329 inline
ParseCaseVariant(const CDbtag & dbtag) const2330 CSeq_id_Handle::TVariant CSeq_id_General_PlainInfo::ParseCaseVariant(const CDbtag& dbtag) const
2331 {
2332     const CDbtag& src = m_Seq_id->GetGeneral();
2333     if ( dbtag.GetTag().IsId() ) {
2334         return s_ParseCaseVariant(src.GetDb(), dbtag.GetDb()).first;
2335     }
2336     else {
2337         auto t1 = s_ParseCaseVariant(src.GetDb(), dbtag.GetDb());
2338         auto t2 = s_ParseCaseVariant(src.GetTag().GetStr(), dbtag.GetTag().GetStr(), t1.second);
2339         return t1.first | t2.first;
2340     }
2341 }
2342 
2343 
GetPackedSeqId(TPacked packed,TVariant variant) const2344 CConstRef<CSeq_id> CSeq_id_General_PlainInfo::GetPackedSeqId(TPacked packed, TVariant variant) const
2345 {
2346     if ( !variant ) {
2347         return m_Seq_id;
2348     }
2349     CRef<CSeq_id> id(new CSeq_id);
2350     CDbtag& dbtag = id->SetGeneral();
2351     s_AssignDbtag(dbtag, m_Seq_id->GetGeneral());
2352     if ( dbtag.GetTag().IsId() ) {
2353         s_RestoreCaseVariant(dbtag.SetDb(), variant);
2354     }
2355     else {
2356         variant = s_RestoreCaseVariant(dbtag.SetDb(), variant);
2357         s_RestoreCaseVariant(dbtag.SetTag().SetStr(), variant);
2358     }
2359     return id;
2360 }
2361 
2362 /////////////////////////////////////////////////////////////////////////////
2363 // CSeq_id_General_Tree
2364 /////////////////////////////////////////////////////////////////////////////
2365 
2366 
CSeq_id_General_Tree(CSeq_id_Mapper * mapper)2367 CSeq_id_General_Tree::CSeq_id_General_Tree(CSeq_id_Mapper* mapper)
2368     : CSeq_id_Which_Tree(mapper)
2369 {
2370 }
2371 
2372 
~CSeq_id_General_Tree(void)2373 CSeq_id_General_Tree::~CSeq_id_General_Tree(void)
2374 {
2375 }
2376 
2377 
Empty(void) const2378 bool CSeq_id_General_Tree::Empty(void) const
2379 {
2380     return m_DbMap.empty() && m_PackedIdMap.empty() && m_PackedStrMap.empty();
2381 }
2382 
2383 
x_FindInfo(const CDbtag & dbid) const2384 CSeq_id_General_PlainInfo* CSeq_id_General_Tree::x_FindInfo(const CDbtag& dbid) const
2385 {
2386     TDbMap::const_iterator db = m_DbMap.find(dbid.GetDb());
2387     if (db == m_DbMap.end())
2388         return 0;
2389     const STagMap& tm = db->second;
2390     const CObject_id& oid = dbid.GetTag();
2391     if ( oid.IsStr() ) {
2392         STagMap::TByStr::const_iterator it = tm.m_ByStr.find(oid.GetStr());
2393         if (it != tm.m_ByStr.end()) {
2394             return it->second;
2395         }
2396     }
2397     else if ( oid.IsId() ) {
2398         STagMap::TById::const_iterator it = tm.m_ById.find(oid.GetId());
2399         if (it != tm.m_ById.end()) {
2400             return it->second;
2401         }
2402     }
2403     // Not found
2404     return 0;
2405 }
2406 
2407 
2408 static const size_t kMinGeneralStrDigits = 3;
2409 
2410 
FindInfo(const CSeq_id & id) const2411 CSeq_id_Handle CSeq_id_General_Tree::FindInfo(const CSeq_id& id) const
2412 {
2413     _ASSERT( id.IsGeneral() );
2414     const CDbtag& dbid = id.GetGeneral();
2415     if ( s_PackGeneralEnabled() ) {
2416         switch ( dbid.GetTag().Which() ) {
2417         case CObject_id::e_Str:
2418         {
2419             TPackedStrKey key = CSeq_id_General_Str_Info::Parse(dbid);
2420             if ( key.GetStrDigits() < kMinGeneralStrDigits ) {
2421                 break;
2422             }
2423             TPacked packed = CSeq_id_General_Str_Info::Pack(key, dbid);
2424             TReadLockGuard guard(m_TreeLock);
2425             TPackedStrMap::const_iterator it = m_PackedStrMap.find(key);
2426             if ( it != m_PackedStrMap.end() ) {
2427                 return CSeq_id_Handle(it->second, packed, it->first.ParseCaseVariant(dbid));
2428             }
2429             return null;
2430         }
2431         case CObject_id::e_Id:
2432         {
2433             const string& key = dbid.GetDb();
2434             TPacked packed = CSeq_id_General_Id_Info::Pack(key, dbid);
2435             TReadLockGuard guard(m_TreeLock);
2436             TPackedIdMap::const_iterator it = m_PackedIdMap.find(key);
2437             if ( it != m_PackedIdMap.end() ) {
2438                 return CSeq_id_Handle(it->second, packed, s_ParseCaseVariant(it->first, dbid.GetDb()).first);
2439             }
2440             return null;
2441         }
2442         default:
2443             return null;
2444         }
2445     }
2446     TReadLockGuard guard(m_TreeLock);
2447     CSeq_id_General_PlainInfo* info = x_FindInfo(dbid);
2448     CSeq_id_Handle::TVariant variant = info? info->ParseCaseVariant(dbid): 0;
2449     return CSeq_id_Handle(info, 0, variant);
2450 }
2451 
2452 
FindOrCreate(const CSeq_id & id)2453 CSeq_id_Handle CSeq_id_General_Tree::FindOrCreate(const CSeq_id& id)
2454 {
2455     _ASSERT( id.IsGeneral() );
2456     const CDbtag& dbid = id.GetGeneral();
2457     if ( s_PackGeneralEnabled() ) {
2458         switch ( dbid.GetTag().Which() ) {
2459         case CObject_id::e_Str:
2460         {
2461             TPackedStrKey key = CSeq_id_General_Str_Info::Parse(dbid);
2462             if ( key.GetStrDigits() < kMinGeneralStrDigits ) {
2463                 break;
2464             }
2465             TPacked packed = CSeq_id_General_Str_Info::Pack(key, dbid);
2466             TWriteLockGuard guard(m_TreeLock);
2467             TPackedStrMap::iterator it = m_PackedStrMap.find(key);
2468             if ( it == m_PackedStrMap.end() ) {
2469                 CConstRef<CSeq_id_General_Str_Info> info
2470                     (new CSeq_id_General_Str_Info(m_Mapper, key));
2471                 m_PackedStrMap.insert(TPackedStrMap::value_type(key, info));
2472                 // newly created ids have case variant bits all zeros
2473                 return CSeq_id_Handle(info, packed, 0);
2474             }
2475             else {
2476                 // determine case variant
2477                 CSeq_id_Handle::TVariant variant = it->first.ParseCaseVariant(dbid);
2478                 return CSeq_id_Handle(it->second, packed, variant);
2479             }
2480         }
2481         case CObject_id::e_Id:
2482         {
2483             const string& key = dbid.GetDb();
2484             TPacked packed = CSeq_id_General_Id_Info::Pack(key, dbid);
2485             TWriteLockGuard guard(m_TreeLock);
2486             TPackedIdMap::iterator it = m_PackedIdMap.lower_bound(key);
2487             CSeq_id_Handle::TVariant variant = 0;
2488             if ( it == m_PackedIdMap.end() ||
2489                  !NStr::EqualNocase(it->first, key) ) {
2490                 CConstRef<CSeq_id_General_Id_Info> info
2491                     (new CSeq_id_General_Id_Info(m_Mapper, key));
2492                 it = m_PackedIdMap.insert
2493                     (it, TPackedIdMap::value_type(key, info));
2494             }
2495             else {
2496                 variant = s_ParseCaseVariant(it->first, dbid.GetDb()).first;
2497             }
2498             return CSeq_id_Handle(it->second, packed, variant);
2499         }
2500         default:
2501             break;
2502         }
2503     }
2504     TWriteLockGuard guard(m_TreeLock);
2505     CSeq_id_General_PlainInfo* info = x_FindInfo(dbid);
2506     CSeq_id_Handle::TVariant variant = 0;
2507     if ( !info ) {
2508         info = new CSeq_id_General_PlainInfo(dbid, m_Mapper);
2509         STagMap& tm = m_DbMap[dbid.GetDb()];
2510         const CObject_id& oid = dbid.GetTag();
2511         if ( oid.IsStr() ) {
2512             //LOG_POST("CSeq_id_General_Tree::CreateStr("<<oid.GetStr()<<")");
2513             _VERIFY(tm.m_ByStr.insert
2514                     (STagMap::TByStr::value_type(oid.GetStr(), info)).second);
2515         }
2516         else if ( oid.IsId() ) {
2517             //LOG_POST("CSeq_id_General_Tree::CreateStr("<<oid.GetId()<<")");
2518             _VERIFY(tm.m_ById.insert(STagMap::TById::value_type(oid.GetId(),
2519                                                                 info)).second);
2520         }
2521         else {
2522             NCBI_THROW(CSeq_id_MapperException, eEmptyError,
2523                        "Can not create index for an empty db-tag");
2524         }
2525     }
2526     else {
2527         variant = info->ParseCaseVariant(dbid);
2528     }
2529     return CSeq_id_Handle(info, 0, variant);
2530 }
2531 
2532 
x_Unindex(const CSeq_id_Info * info)2533 void CSeq_id_General_Tree::x_Unindex(const CSeq_id_Info* info)
2534 {
2535     if ( !m_PackedStrMap.empty() ) {
2536         const CSeq_id_General_Str_Info* sinfo =
2537             dynamic_cast<const CSeq_id_General_Str_Info*>(info);
2538         if ( sinfo ) {
2539             m_PackedStrMap.erase(sinfo->GetKey());
2540             return;
2541         }
2542     }
2543     if ( !m_PackedIdMap.empty() ) {
2544         const CSeq_id_General_Id_Info* sinfo =
2545             dynamic_cast<const CSeq_id_General_Id_Info*>(info);
2546         if ( sinfo ) {
2547             m_PackedIdMap.erase(sinfo->GetKey());
2548             return;
2549         }
2550     }
2551 
2552     CConstRef<CSeq_id> id = info->GetSeqId();
2553     _ASSERT( id->IsGeneral() );
2554     const CDbtag& dbid = id->GetGeneral();
2555 
2556     TDbMap::iterator db_it = m_DbMap.find(dbid.GetDb());
2557     _ASSERT(db_it != m_DbMap.end());
2558     STagMap& tm = db_it->second;
2559     const CObject_id& oid = dbid.GetTag();
2560     if ( oid.IsStr() ) {
2561         _VERIFY(tm.m_ByStr.erase(oid.GetStr()));
2562     }
2563     else if ( oid.IsId() ) {
2564         _VERIFY(tm.m_ById.erase(oid.GetId()));
2565     }
2566     if (tm.m_ByStr.empty()  &&  tm.m_ById.empty())
2567         m_DbMap.erase(db_it);
2568 }
2569 
2570 
HaveMatch(const CSeq_id_Handle & id) const2571 bool CSeq_id_General_Tree::HaveMatch(const CSeq_id_Handle& id) const
2572 {
2573     // match id <-> str(number)
2574     if ( !m_PackedStrMap.empty() ) {
2575         const CSeq_id_General_Str_Info* sinfo =
2576             dynamic_cast<const CSeq_id_General_Str_Info*>(id.x_GetInfo());
2577         if ( sinfo ) {
2578             // string with non-digital prefix or suffix
2579             // cannot be converted to numeric id
2580             if ( !sinfo->GetStrSuffix().empty() ||
2581                  !sx_AllDigits(sinfo->GetStrPrefix()) ) {
2582                 return false;
2583             }
2584         }
2585     }
2586     return true;
2587 }
2588 
2589 
FindMatch(const CSeq_id_Handle & id,TSeq_id_MatchList & id_list) const2590 void CSeq_id_General_Tree::FindMatch(const CSeq_id_Handle& id,
2591                                      TSeq_id_MatchList& id_list) const
2592 {
2593     id_list.insert(id);
2594     // match id <-> str(number)
2595     if ( !m_PackedStrMap.empty() ) {
2596         const CSeq_id_General_Str_Info* sinfo =
2597             dynamic_cast<const CSeq_id_General_Str_Info*>(id.x_GetInfo());
2598         if ( sinfo ) {
2599             // string with non-digital prefix or suffix
2600             // cannot be converted to numeric id
2601             if ( !sinfo->GetStrSuffix().empty() ||
2602                  !sx_AllDigits(sinfo->GetStrPrefix()) ) {
2603                 return;
2604             }
2605         }
2606     }
2607     CConstRef<CSeq_id> seq_id = id.GetSeqId();
2608     const CDbtag& dbtag = seq_id->GetGeneral();
2609     const CObject_id& obj_id = dbtag.GetTag();
2610     if ( obj_id.IsId() ) {
2611         int n = obj_id.GetId();
2612         if ( n >= 0 ) {
2613             CSeq_id seq_id2;
2614             CDbtag& dbtag2 = seq_id2.SetGeneral();
2615             dbtag2.SetDb(dbtag.GetDb());
2616             dbtag2.SetTag().SetStr(NStr::IntToString(n));
2617             CSeq_id_Handle id2 = FindInfo(seq_id2);
2618             if ( id2 ) {
2619                 id_list.insert(id2);
2620             }
2621         }
2622     }
2623     else {
2624         const string& s = obj_id.GetStr();
2625         int n = NStr::StringToNonNegativeInt(s);
2626         if ( n >= 0 && NStr::IntToString(n) == s ) {
2627             CSeq_id seq_id2;
2628             CDbtag& dbtag2 = seq_id2.SetGeneral();
2629             dbtag2.SetDb(dbtag.GetDb());
2630             dbtag2.SetTag().SetId(n);
2631             CSeq_id_Handle id2 = FindInfo(seq_id2);
2632             if ( id2 ) {
2633                 id_list.insert(id2);
2634             }
2635         }
2636     }
2637 }
2638 
2639 
FindMatchStr(const string & sid,TSeq_id_MatchList & id_list) const2640 void CSeq_id_General_Tree::FindMatchStr(const string& sid,
2641                                         TSeq_id_MatchList& id_list) const
2642 {
2643     TPacked value;
2644     bool ok;
2645     try {
2646         value = NStr::StringToNumeric<TPacked>(sid);
2647         ok = true;
2648     }
2649     catch (const CStringException&) {
2650         // Not an integer value
2651         value = -1;
2652         ok = false;
2653     }
2654     TReadLockGuard guard(m_TreeLock);
2655     ITERATE(TDbMap, db_it, m_DbMap) {
2656         // In any case search in strings
2657         STagMap::TByStr::const_iterator str_it =
2658             db_it->second.m_ByStr.find(sid);
2659         if (str_it != db_it->second.m_ByStr.end()) {
2660             id_list.insert(CSeq_id_Handle(str_it->second));
2661         }
2662         if ( ok ) {
2663             STagMap::TById::const_iterator int_it =
2664                 db_it->second.m_ById.find(value);
2665             if (int_it != db_it->second.m_ById.end()) {
2666                 id_list.insert(CSeq_id_Handle(int_it->second));
2667             }
2668         }
2669     }
2670 }
2671 
2672 
Dump(CNcbiOstream & out,CSeq_id::E_Choice type,int details) const2673 size_t CSeq_id_General_Tree::Dump(CNcbiOstream& out,
2674                                   CSeq_id::E_Choice type,
2675                                   int details) const
2676 {
2677     size_t total_bytes = 0;
2678     if ( details >= CSeq_id_Mapper::eDumpStatistics ) {
2679         out << "CSeq_id_Handles("<<CSeq_id::SelectionName(type)<<"): "<<endl;
2680     }
2681     {{ // m_DbMap
2682         size_t count = 0, bytes = 0;
2683         ITERATE ( TDbMap, it, m_DbMap ) {
2684             bytes += sizeof(string)+sizeof(STagMap); // map value
2685             bytes += sizeof(int)+3*sizeof(void*); // red/black tree
2686             // malloc overhead:
2687             // map value
2688             bytes += 1*kMallocOverhead;
2689             bytes += sx_StringMemory(it->first);
2690             ITERATE ( STagMap::TById, it2, it->second.m_ById ) {
2691                 count += 1;
2692                 bytes += sizeof(it2->first)+sizeof(it2->second); // map
2693                 bytes += sizeof(int)+3*sizeof(void*); // red/black tree
2694                 bytes += sizeof(CSeq_id_Info);
2695                 bytes += sizeof(CSeq_id);
2696                 bytes += sizeof(CObject_id);
2697                 bytes += 4*kMallocOverhead;
2698             }
2699             ITERATE ( STagMap::TByStr, it2, it->second.m_ByStr ) {
2700                 count += 1;
2701                 bytes += sizeof(it2->first)+sizeof(it2->second); // map
2702                 bytes += sizeof(int)+3*sizeof(void*); // red/black tree
2703                 bytes += sizeof(CSeq_id_Info);
2704                 bytes += sizeof(CSeq_id);
2705                 bytes += sizeof(CObject_id);
2706                 bytes += 4*kMallocOverhead;
2707                 bytes += sx_StringMemory(it2->first);
2708             }
2709         }
2710         total_bytes += bytes;
2711         if ( details >= CSeq_id_Mapper::eDumpStatistics ) {
2712             out << " "<<count << " handles, "<<bytes<<" bytes" << endl;
2713         }
2714     }}
2715     {{ // m_PackedIdMap
2716         size_t count = m_PackedIdMap.size(), elem_size = 0, extra_size = 0;
2717         if ( count ) {
2718             elem_size = sizeof(TPackedIdKey)+sizeof(void*);
2719             elem_size += sizeof(int)+3*sizeof(void*); // red/black tree
2720             elem_size += sizeof(CSeq_id_General_Id_Info); //
2721             // malloc overhead:
2722             // map value, CSeq_id_General_Id_Info
2723             elem_size += 2*kMallocOverhead;
2724             ITERATE ( TPackedIdMap, it, m_PackedIdMap ) {
2725                 extra_size += sx_StringMemory(it->first);
2726             }
2727         }
2728         size_t bytes = extra_size + count*elem_size;
2729         total_bytes += bytes;
2730         if ( details >= CSeq_id_Mapper::eDumpStatistics ) {
2731             out << " "<<count << " packed int handles, "<<bytes<<" bytes" << endl;
2732         }
2733     }}
2734     {{ // m_PackedStrMap
2735         size_t count = m_PackedStrMap.size(), elem_size = 0, extra_size = 0;
2736         if ( count ) {
2737             elem_size = sizeof(TPackedIdKey)+sizeof(void*);
2738             elem_size += sizeof(int)+3*sizeof(void*); // red/black tree
2739             elem_size += sizeof(CSeq_id_General_Str_Info); //
2740             // malloc overhead:
2741             // map value, CSeq_id_General_Id_Info
2742             elem_size += 2*kMallocOverhead;
2743             ITERATE ( TPackedStrMap, it, m_PackedStrMap ) {
2744                 extra_size += sx_StringMemory(it->first.m_Db);
2745                 extra_size += sx_StringMemory(it->first.m_StrPrefix);
2746                 extra_size += sx_StringMemory(it->first.m_StrSuffix);
2747             }
2748         }
2749         size_t bytes = extra_size + count*elem_size;
2750         total_bytes += bytes;
2751         if ( details >= CSeq_id_Mapper::eDumpStatistics ) {
2752             out << " "<<count << " packed str handles, "<<bytes<<" bytes" << endl;
2753         }
2754     }}
2755     if ( details >= CSeq_id_Mapper::eDumpAllIds ) {
2756         ITERATE ( TDbMap, it, m_DbMap ) {
2757             ITERATE ( STagMap::TByStr, it2, it->second.m_ByStr ) {
2758                 out << "  "<<it2->second->GetSeqId()->AsFastaString() << endl;
2759             }
2760             ITERATE ( STagMap::TByStr, it2, it->second.m_ByStr ) {
2761                 out << "  "<<it2->second->GetSeqId()->AsFastaString() << endl;
2762             }
2763         }
2764         ITERATE ( TPackedIdMap, it, m_PackedIdMap ) {
2765             out << "  packed int "<<it->first << endl;
2766         }
2767         ITERATE ( TPackedStrMap, it, m_PackedStrMap ) {
2768             out << "  packed str "<<it->first.m_Key<<"/"<<it->first.m_Db<<"/"
2769                 <<it->first.m_StrPrefix<<"/"<<it->first.m_StrSuffix << endl;
2770         }
2771     }
2772     return total_bytes;
2773 }
2774 
2775 /////////////////////////////////////////////////////////////////////////////
2776 // CSeq_id_Giim_Tree
2777 /////////////////////////////////////////////////////////////////////////////
2778 
2779 
CSeq_id_Giim_Tree(CSeq_id_Mapper * mapper)2780 CSeq_id_Giim_Tree::CSeq_id_Giim_Tree(CSeq_id_Mapper* mapper)
2781     : CSeq_id_Which_Tree(mapper)
2782 {
2783 }
2784 
2785 
~CSeq_id_Giim_Tree(void)2786 CSeq_id_Giim_Tree::~CSeq_id_Giim_Tree(void)
2787 {
2788 }
2789 
2790 
Empty(void) const2791 bool CSeq_id_Giim_Tree::Empty(void) const
2792 {
2793     return m_IdMap.empty();
2794 }
2795 
2796 
x_FindInfo(const CGiimport_id & gid) const2797 CSeq_id_Info* CSeq_id_Giim_Tree::x_FindInfo(const CGiimport_id& gid) const
2798 {
2799     TIdMap::const_iterator id_it = m_IdMap.find(gid.GetId());
2800     if (id_it == m_IdMap.end())
2801         return 0;
2802     ITERATE (TGiimList, dbr_it, id_it->second) {
2803         CConstRef<CSeq_id> id = (*dbr_it)->GetSeqId();
2804         const CGiimport_id& gid2 = id->GetGiim();
2805         // Both Db and Release must be equal
2806         if ( !gid.Equals(gid2) ) {
2807             return *dbr_it;
2808         }
2809     }
2810     // Not found
2811     return 0;
2812 }
2813 
2814 
FindInfo(const CSeq_id & id) const2815 CSeq_id_Handle CSeq_id_Giim_Tree::FindInfo(const CSeq_id& id) const
2816 {
2817     _ASSERT( id.IsGiim() );
2818     const CGiimport_id& gid = id.GetGiim();
2819     TReadLockGuard guard(m_TreeLock);
2820     return CSeq_id_Handle(x_FindInfo(gid));
2821 }
2822 
2823 
FindOrCreate(const CSeq_id & id)2824 CSeq_id_Handle CSeq_id_Giim_Tree::FindOrCreate(const CSeq_id& id)
2825 {
2826     _ASSERT( id.IsGiim() );
2827     const CGiimport_id& gid = id.GetGiim();
2828     TWriteLockGuard guard(m_TreeLock);
2829     CSeq_id_Info* info = x_FindInfo(gid);
2830     if ( !info ) {
2831         info = CreateInfo(id);
2832         m_IdMap[gid.GetId()].push_back(info);
2833     }
2834     return CSeq_id_Handle(info);
2835 }
2836 
2837 
x_Unindex(const CSeq_id_Info * info)2838 void CSeq_id_Giim_Tree::x_Unindex(const CSeq_id_Info* info)
2839 {
2840     CConstRef<CSeq_id> id = info->GetSeqId();
2841     _ASSERT( id->IsGiim() );
2842     const CGiimport_id& gid = id->GetGiim();
2843 
2844     TIdMap::iterator id_it = m_IdMap.find(gid.GetId());
2845     _ASSERT(id_it != m_IdMap.end());
2846     TGiimList& giims = id_it->second;
2847     NON_CONST_ITERATE(TGiimList, dbr_it, giims) {
2848         if (*dbr_it == info) {
2849             giims.erase(dbr_it);
2850             break;
2851         }
2852     }
2853     if ( giims.empty() )
2854         m_IdMap.erase(id_it);
2855 }
2856 
2857 
FindMatchStr(const string & sid,TSeq_id_MatchList & id_list) const2858 void CSeq_id_Giim_Tree::FindMatchStr(const string& sid,
2859                                      TSeq_id_MatchList& id_list) const
2860 {
2861     TReadLockGuard guard(m_TreeLock);
2862     try {
2863         TPacked value = NStr::StringToNumeric<TPacked>(sid);
2864         TIdMap::const_iterator it = m_IdMap.find(value);
2865         if (it == m_IdMap.end())
2866             return;
2867         ITERATE(TGiimList, git, it->second) {
2868             id_list.insert(CSeq_id_Handle(*git));
2869         }
2870     }
2871     catch (CStringException) {
2872         // Not an integer value
2873         return;
2874     }
2875 }
2876 
2877 
Dump(CNcbiOstream & out,CSeq_id::E_Choice type,int details) const2878 size_t CSeq_id_Giim_Tree::Dump(CNcbiOstream& out,
2879                                CSeq_id::E_Choice type,
2880                                int details) const
2881 {
2882     size_t total_bytes = 0;
2883     if ( details >= CSeq_id_Mapper::eDumpStatistics ) {
2884         out << "CSeq_id_Handles("<<CSeq_id::SelectionName(type)<<"): ";
2885     }
2886     size_t count = 0, bytes = 0;
2887     ITERATE ( TIdMap, it, m_IdMap ) {
2888         bytes += sizeof(it->first) + sizeof(it->second);
2889         bytes += sizeof(int)+3*sizeof(void*); // red/black tree
2890         // malloc overhead:
2891         // map value, vector
2892         bytes += 2*kMallocOverhead;
2893         size_t size2 = it->second.size();
2894         count += size2;
2895         bytes += it->second.capacity()*sizeof(void*);
2896         bytes += size2*sizeof(CSeq_id_Info);
2897         bytes += size2*sizeof(CSeq_id);
2898         bytes += size2*sizeof(CGiimport_id);
2899         ITERATE ( TGiimList, it2, it->second ) {
2900             const CGiimport_id& id = (*it2)->GetSeqId()->GetGiim();
2901             if ( id.IsSetDb() ) {
2902                 bytes += sx_StringMemory(id.GetDb());
2903             }
2904             if ( id.IsSetRelease() ) {
2905                 bytes += sx_StringMemory(id.GetRelease());
2906             }
2907         }
2908     }
2909     total_bytes += bytes;
2910     if ( details >= CSeq_id_Mapper::eDumpStatistics ) {
2911         out << count << " handles, "<<bytes<<" bytes" << endl;
2912     }
2913     if ( details >= CSeq_id_Mapper::eDumpAllIds ) {
2914         ITERATE ( TIdMap, it, m_IdMap ) {
2915             ITERATE ( TGiimList, it2, it->second ) {
2916                 out << "  "<<(*it2)->GetSeqId()->AsFastaString() << endl;
2917             }
2918         }
2919     }
2920     return total_bytes;
2921 }
2922 
2923 /////////////////////////////////////////////////////////////////////////////
2924 // CSeq_id_Patent_Tree
2925 /////////////////////////////////////////////////////////////////////////////
2926 
2927 
CSeq_id_Patent_Tree(CSeq_id_Mapper * mapper)2928 CSeq_id_Patent_Tree::CSeq_id_Patent_Tree(CSeq_id_Mapper* mapper)
2929     : CSeq_id_Which_Tree(mapper)
2930 {
2931 }
2932 
2933 
~CSeq_id_Patent_Tree(void)2934 CSeq_id_Patent_Tree::~CSeq_id_Patent_Tree(void)
2935 {
2936 }
2937 
2938 
Empty(void) const2939 bool CSeq_id_Patent_Tree::Empty(void) const
2940 {
2941     return m_CountryMap.empty();
2942 }
2943 
2944 
x_FindInfo(const CPatent_seq_id & pid) const2945 CSeq_id_Info* CSeq_id_Patent_Tree::x_FindInfo(const CPatent_seq_id& pid) const
2946 {
2947     const CId_pat& cit = pid.GetCit();
2948     TByCountry::const_iterator cntry_it = m_CountryMap.find(cit.GetCountry());
2949     if (cntry_it == m_CountryMap.end())
2950         return 0;
2951 
2952     const string* number;
2953     const SPat_idMap::TByNumber* by_number;
2954     if ( cit.GetId().IsNumber() ) {
2955         number = &cit.GetId().GetNumber();
2956         by_number = &cntry_it->second.m_ByNumber;
2957     }
2958     else if ( cit.GetId().IsApp_number() ) {
2959         number = &cit.GetId().GetApp_number();
2960         by_number = &cntry_it->second.m_ByApp_number;
2961     }
2962     else {
2963         return 0;
2964     }
2965 
2966     SPat_idMap::TByNumber::const_iterator num_it = by_number->find(*number);
2967     if (num_it == by_number->end())
2968         return 0;
2969     SPat_idMap::TBySeqid::const_iterator seqid_it =
2970         num_it->second.find(pid.GetSeqid());
2971     if (seqid_it != num_it->second.end()) {
2972         return seqid_it->second;
2973     }
2974     // Not found
2975     return 0;
2976 }
2977 
2978 
FindInfo(const CSeq_id & id) const2979 CSeq_id_Handle CSeq_id_Patent_Tree::FindInfo(const CSeq_id& id) const
2980 {
2981     _ASSERT( id.IsPatent() );
2982     const CPatent_seq_id& pid = id.GetPatent();
2983     TReadLockGuard guard(m_TreeLock);
2984     return CSeq_id_Handle(x_FindInfo(pid));
2985 }
2986 
FindOrCreate(const CSeq_id & id)2987 CSeq_id_Handle CSeq_id_Patent_Tree::FindOrCreate(const CSeq_id& id)
2988 {
2989     _ASSERT( id.IsPatent() );
2990     const CPatent_seq_id& pid = id.GetPatent();
2991     TWriteLockGuard guard(m_TreeLock);
2992     CSeq_id_Info* info = x_FindInfo(pid);
2993     if ( !info ) {
2994         const CId_pat& cit = pid.GetCit();
2995         SPat_idMap& country = m_CountryMap[cit.GetCountry()];
2996         if ( cit.GetId().IsNumber() ) {
2997             SPat_idMap::TBySeqid& num =
2998                 country.m_ByNumber[cit.GetId().GetNumber()];
2999             _ASSERT(num.find(pid.GetSeqid()) == num.end());
3000             info = CreateInfo(id);
3001             num[pid.GetSeqid()] = info;
3002         }
3003         else if ( cit.GetId().IsApp_number() ) {
3004             SPat_idMap::TBySeqid& app = country.m_ByApp_number[
3005                 cit.GetId().GetApp_number()];
3006             _ASSERT(app.find(pid.GetSeqid()) == app.end());
3007             info = CreateInfo(id);
3008             app[pid.GetSeqid()] = info;
3009         }
3010         else {
3011             // Can not index empty patent number
3012             NCBI_THROW(CSeq_id_MapperException, eEmptyError,
3013                        "Cannot index empty patent number");
3014         }
3015     }
3016     return CSeq_id_Handle(info);
3017 }
3018 
3019 
x_Unindex(const CSeq_id_Info * info)3020 void CSeq_id_Patent_Tree::x_Unindex(const CSeq_id_Info* info)
3021 {
3022     CConstRef<CSeq_id> id = info->GetSeqId();
3023     _ASSERT( id->IsPatent() );
3024     const CPatent_seq_id& pid = id->GetPatent();
3025 
3026     TByCountry::iterator country_it =
3027         m_CountryMap.find(pid.GetCit().GetCountry());
3028     _ASSERT(country_it != m_CountryMap.end());
3029     SPat_idMap& pats = country_it->second;
3030     if ( pid.GetCit().GetId().IsNumber() ) {
3031         SPat_idMap::TByNumber::iterator num_it =
3032             pats.m_ByNumber.find(pid.GetCit().GetId().GetNumber());
3033         _ASSERT(num_it != pats.m_ByNumber.end());
3034         SPat_idMap::TBySeqid::iterator seqid_it =
3035             num_it->second.find(pid.GetSeqid());
3036         _ASSERT(seqid_it != num_it->second.end());
3037         _ASSERT(seqid_it->second == info);
3038         num_it->second.erase(seqid_it);
3039         if ( num_it->second.empty() )
3040             pats.m_ByNumber.erase(num_it);
3041     }
3042     else if ( pid.GetCit().GetId().IsApp_number() ) {
3043         SPat_idMap::TByNumber::iterator app_it =
3044             pats.m_ByApp_number.find(pid.GetCit().GetId().GetApp_number());
3045         _ASSERT( app_it != pats.m_ByApp_number.end() );
3046         SPat_idMap::TBySeqid::iterator seqid_it =
3047             app_it->second.find(pid.GetSeqid());
3048         _ASSERT(seqid_it != app_it->second.end());
3049         _ASSERT(seqid_it->second == info);
3050         app_it->second.erase(seqid_it);
3051         if ( app_it->second.empty() )
3052             pats.m_ByApp_number.erase(app_it);
3053     }
3054     if (country_it->second.m_ByNumber.empty()  &&
3055         country_it->second.m_ByApp_number.empty())
3056         m_CountryMap.erase(country_it);
3057 }
3058 
3059 
FindMatchStr(const string & sid,TSeq_id_MatchList & id_list) const3060 void CSeq_id_Patent_Tree::FindMatchStr(const string& sid,
3061                                        TSeq_id_MatchList& id_list) const
3062 {
3063     TReadLockGuard guard(m_TreeLock);
3064     ITERATE (TByCountry, cit, m_CountryMap) {
3065         SPat_idMap::TByNumber::const_iterator nit =
3066             cit->second.m_ByNumber.find(sid);
3067         if (nit != cit->second.m_ByNumber.end()) {
3068             ITERATE(SPat_idMap::TBySeqid, iit, nit->second) {
3069                 id_list.insert(CSeq_id_Handle(iit->second));
3070             }
3071         }
3072         SPat_idMap::TByNumber::const_iterator ait =
3073             cit->second.m_ByApp_number.find(sid);
3074         if (ait != cit->second.m_ByApp_number.end()) {
3075             ITERATE(SPat_idMap::TBySeqid, iit, nit->second) {
3076                 id_list.insert(CSeq_id_Handle(iit->second));
3077             }
3078         }
3079     }
3080 }
3081 
3082 
Dump(CNcbiOstream & out,CSeq_id::E_Choice type,int details) const3083 size_t CSeq_id_Patent_Tree::Dump(CNcbiOstream& out,
3084                                  CSeq_id::E_Choice type,
3085                                  int details) const
3086 {
3087     size_t total_bytes = 0;
3088     if ( details >= CSeq_id_Mapper::eDumpStatistics ) {
3089         out << "CSeq_id_Handles("<<CSeq_id::SelectionName(type)<<"): ";
3090     }
3091     size_t count = 0, bytes = 0;
3092     ITERATE ( TByCountry, it, m_CountryMap ) {
3093         bytes += sizeof(it->first) + sizeof(it->second);
3094         bytes += sizeof(int)+3*sizeof(void*); // red/black tree
3095         // malloc overhead:
3096         // map value, vector
3097         bytes += 1*kMallocOverhead;
3098         bytes += sx_StringMemory(it->first);
3099         ITERATE ( SPat_idMap::TByNumber, it2, it->second.m_ByNumber ) {
3100             bytes += sizeof(it2->first) + sizeof(it2->second);
3101             bytes += sizeof(int)+3*sizeof(void*); // red/black tree
3102             // malloc overhead:
3103             // map value, vector
3104             bytes += 1*kMallocOverhead;
3105             bytes += sx_StringMemory(it2->first);
3106             ITERATE ( SPat_idMap::TBySeqid, it3, it2->second ) {
3107                 count += 1;
3108                 bytes += sizeof(it2->first) + sizeof(it2->second);
3109                 bytes += sizeof(int)+3*sizeof(void*); // red/black tree
3110                 bytes += sizeof(CSeq_id_Info);
3111                 bytes += sizeof(CSeq_id);
3112                 bytes += sizeof(CPatent_seq_id);
3113                 bytes += sizeof(CId_pat);
3114                 // malloc overhead:
3115                 // map value,
3116                 bytes += 5*kMallocOverhead;
3117             }
3118         }
3119     }
3120     total_bytes += bytes;
3121     if ( details >= CSeq_id_Mapper::eDumpStatistics ) {
3122         out << count << " handles, "<<bytes<<" bytes" << endl;
3123     }
3124     if ( details >= CSeq_id_Mapper::eDumpAllIds ) {
3125         ITERATE ( TByCountry, it, m_CountryMap ) {
3126             ITERATE ( SPat_idMap::TByNumber, it2, it->second.m_ByNumber ) {
3127                 ITERATE ( SPat_idMap::TBySeqid, it3, it2->second ) {
3128                     out << "  "<<it3->second->GetSeqId()->AsFastaString() << endl;
3129                 }
3130             }
3131         }
3132     }
3133     return total_bytes;
3134 }
3135 
3136 /////////////////////////////////////////////////////////////////////////////
3137 // CSeq_id_PDB_Tree
3138 /////////////////////////////////////////////////////////////////////////////
3139 
3140 
CSeq_id_PDB_Tree(CSeq_id_Mapper * mapper)3141 CSeq_id_PDB_Tree::CSeq_id_PDB_Tree(CSeq_id_Mapper* mapper)
3142     : CSeq_id_Which_Tree(mapper)
3143 {
3144 }
3145 
3146 
~CSeq_id_PDB_Tree(void)3147 CSeq_id_PDB_Tree::~CSeq_id_PDB_Tree(void)
3148 {
3149 }
3150 
3151 
Empty(void) const3152 bool CSeq_id_PDB_Tree::Empty(void) const
3153 {
3154     return m_MolMap.empty();
3155 }
3156 
3157 
x_IdToStrKey(const CPDB_seq_id & id) const3158 inline string CSeq_id_PDB_Tree::x_IdToStrKey(const CPDB_seq_id& id) const
3159 {
3160 // this is an attempt to follow the undocumented rules of PDB
3161 // ("documented" as code written elsewhere)
3162     string skey = id.GetMol().Get();
3163     if (id.IsSetChain_id()) {
3164         skey += '_';
3165         skey += id.GetChain_id();
3166     }
3167     else if (id.IsSetChain()) {
3168         skey += '_';
3169         skey += char(id.GetChain());
3170     }
3171     return skey;
3172 }
3173 
3174 
FindInfo(const CSeq_id & id) const3175 CSeq_id_Handle CSeq_id_PDB_Tree::FindInfo(const CSeq_id& id) const
3176 {
3177     _ASSERT( id.IsPdb() );
3178     const CPDB_seq_id& pid = id.GetPdb();
3179     TReadLockGuard guard(m_TreeLock);
3180     TMolMap::const_iterator mol_it = m_MolMap.find(x_IdToStrKey(pid));
3181     if ( mol_it != m_MolMap.end() ) {
3182         ITERATE( TSubMolList, it, mol_it->second ) {
3183             if ( pid.Equals((*it)->GetSeqId()->GetPdb()) ) {
3184                 return CSeq_id_Handle(*it);
3185             }
3186         }
3187     }
3188     return CSeq_id_Handle();
3189 }
3190 
3191 
FindOrCreate(const CSeq_id & id)3192 CSeq_id_Handle CSeq_id_PDB_Tree::FindOrCreate(const CSeq_id& id)
3193 {
3194     _ASSERT( id.IsPdb() );
3195     const CPDB_seq_id& pid = id.GetPdb();
3196     TWriteLockGuard guard(m_TreeLock);
3197     TSubMolList& sub = m_MolMap[x_IdToStrKey(id.GetPdb())];
3198     ITERATE ( TSubMolList, it, sub ) {
3199         if ( pid.Equals((*it)->GetSeqId()->GetPdb()) ) {
3200             return CSeq_id_Handle(*it);
3201         }
3202     }
3203     CSeq_id_Info* info = CreateInfo(id);
3204     sub.push_back(info);
3205     return CSeq_id_Handle(info);
3206 }
3207 
3208 
x_Unindex(const CSeq_id_Info * info)3209 void CSeq_id_PDB_Tree::x_Unindex(const CSeq_id_Info* info)
3210 {
3211     CConstRef<CSeq_id> id = info->GetSeqId();
3212     _ASSERT( id->IsPdb() );
3213     const CPDB_seq_id& pid = id->GetPdb();
3214 
3215     TMolMap::iterator mol_it = m_MolMap.find(x_IdToStrKey(pid));
3216     _ASSERT(mol_it != m_MolMap.end());
3217     NON_CONST_ITERATE(TSubMolList, it, mol_it->second) {
3218         if (*it == info) {
3219             _ASSERT(pid.Equals((*it)->GetSeqId()->GetPdb()));
3220             mol_it->second.erase(it);
3221             break;
3222         }
3223     }
3224     if ( mol_it->second.empty() )
3225         m_MolMap.erase(mol_it);
3226 }
3227 
3228 
HaveMatch(const CSeq_id_Handle &) const3229 bool CSeq_id_PDB_Tree::HaveMatch(const CSeq_id_Handle& ) const
3230 {
3231     return true;
3232 }
3233 
3234 
FindMatch(const CSeq_id_Handle & id,TSeq_id_MatchList & id_list) const3235 void CSeq_id_PDB_Tree::FindMatch(const CSeq_id_Handle& id,
3236                                  TSeq_id_MatchList& id_list) const
3237 {
3238     //_ASSERT(id && id == FindInfo(id.GetSeqId()));
3239     CConstRef<CSeq_id> seq_id = id.GetSeqId();
3240     const CPDB_seq_id& pid = seq_id->GetPdb();
3241     TReadLockGuard guard(m_TreeLock);
3242     TMolMap::const_iterator mol_it = m_MolMap.find(x_IdToStrKey(pid));
3243     if (mol_it == m_MolMap.end())
3244         return;
3245     ITERATE(TSubMolList, it, mol_it->second) {
3246         const CPDB_seq_id& pid2 = (*it)->GetSeqId()->GetPdb();
3247         // Ignore date if not set in id
3248         if ( pid.IsSetRel() ) {
3249             if ( !pid2.IsSetRel()  ||
3250                 !pid.GetRel().Equals(pid2.GetRel()) )
3251                 continue;
3252         }
3253         id_list.insert(CSeq_id_Handle(*it));
3254     }
3255 }
3256 
3257 
FindMatchStr(const string & sid,TSeq_id_MatchList & id_list) const3258 void CSeq_id_PDB_Tree::FindMatchStr(const string& sid,
3259                                     TSeq_id_MatchList& id_list) const
3260 {
3261     TReadLockGuard guard(m_TreeLock);
3262     TMolMap::const_iterator mit = m_MolMap.find(sid);
3263     if (mit == m_MolMap.end())
3264         return;
3265     ITERATE(TSubMolList, sub_it, mit->second) {
3266         id_list.insert(CSeq_id_Handle(*sub_it));
3267     }
3268 }
3269 
3270 
HaveReverseMatch(const CSeq_id_Handle &) const3271 bool CSeq_id_PDB_Tree::HaveReverseMatch(const CSeq_id_Handle& ) const
3272 {
3273     return true;
3274 }
3275 
3276 
FindReverseMatch(const CSeq_id_Handle & id,TSeq_id_MatchList & id_list)3277 void CSeq_id_PDB_Tree::FindReverseMatch(const CSeq_id_Handle& id,
3278                                         TSeq_id_MatchList& id_list)
3279 {
3280     //_ASSERT(id && id == FindInfo(id.GetSeqId()));
3281     id_list.insert(id);
3282     CConstRef<CSeq_id> seq_id = id.GetSeqId();
3283     const CPDB_seq_id& pid = seq_id->GetPdb();
3284     if ( !pid.IsSetRel() )
3285         return;
3286     // find id without release date
3287     TReadLockGuard guard(m_TreeLock);
3288     TMolMap::const_iterator mol_it = m_MolMap.find(x_IdToStrKey(pid));
3289     if (mol_it == m_MolMap.end())
3290         return;
3291     ITERATE(TSubMolList, it, mol_it->second) {
3292         const CPDB_seq_id& pid2 = (*it)->GetSeqId()->GetPdb();
3293         // Ignore date if set in id
3294         if ( pid2.IsSetRel() )
3295             continue;
3296         id_list.insert(CSeq_id_Handle(*it));
3297     }
3298 }
3299 
3300 
Dump(CNcbiOstream & out,CSeq_id::E_Choice type,int details) const3301 size_t CSeq_id_PDB_Tree::Dump(CNcbiOstream& out,
3302                               CSeq_id::E_Choice type,
3303                               int details) const
3304 {
3305     size_t total_bytes = 0;
3306     if ( details >= CSeq_id_Mapper::eDumpStatistics ) {
3307         out << "CSeq_id_Handles("<<CSeq_id::SelectionName(type)<<"): ";
3308     }
3309     size_t count = 0, bytes = 0;
3310     ITERATE ( TMolMap, it, m_MolMap ) {
3311         bytes += sizeof(it->first) + sizeof(it->second);
3312         bytes += sizeof(int)+3*sizeof(void*); // red/black tree
3313         // malloc overhead:
3314         // map value, vector
3315         bytes += 2*kMallocOverhead;
3316         bytes += sx_StringMemory(it->first);
3317         size_t size2 = it->second.size();
3318         count += size2;
3319         bytes += it->second.capacity()*sizeof(void*);
3320         bytes += size2*sizeof(CSeq_id_Info);
3321         bytes += size2*sizeof(CSeq_id);
3322         bytes += size2*sizeof(CPDB_seq_id);
3323         ITERATE ( TSubMolList, it2, it->second ) {
3324             const CPDB_seq_id& id = (*it2)->GetSeqId()->GetPdb();
3325             if ( id.IsSetRel() ) {
3326                 bytes += sizeof(CDate);
3327                 bytes += kMallocOverhead;
3328             }
3329         }
3330     }
3331     total_bytes += bytes;
3332     if ( details >= CSeq_id_Mapper::eDumpStatistics ) {
3333         out << count << " handles, "<<bytes<<" bytes" << endl;
3334     }
3335     if ( details >= CSeq_id_Mapper::eDumpAllIds ) {
3336         ITERATE ( TMolMap, it, m_MolMap ) {
3337             ITERATE ( TSubMolList, it2, it->second ) {
3338                 out << "  "<<(*it2)->GetSeqId()->AsFastaString() << endl;
3339             }
3340         }
3341     }
3342     return total_bytes;
3343 }
3344 
3345 
GetErrCodeString(void) const3346 const char* CSeq_id_MapperException::GetErrCodeString(void) const
3347 {
3348     switch ( GetErrCode() ) {
3349     case eTypeError:   return "eTypeError";
3350     case eSymbolError: return "eSymbolError";
3351     case eEmptyError:  return "eEmptyError";
3352     case eOtherError:  return "eOtherError";
3353     default:           return CException::GetErrCodeString();
3354     }
3355 }
3356 
3357 
3358 END_SCOPE(objects)
3359 END_NCBI_SCOPE
3360