1 /*  $Id: data_loader.cpp 610968 2020-06-26 12:55:17Z grichenk $
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *               National Center for Biotechnology Information
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government have not placed any restriction on its use or reproduction.
13 *
14 *  Although all reasonable efforts have been taken to ensure the accuracy
15 *  and reliability of the software and data, the NLM and the U.S.
16 *  Government do not and cannot warrant the performance or results that
17 *  may be obtained by using this software or data. The NLM and the U.S.
18 *  Government disclaim all warranties, express or implied, including
19 *  warranties of performance, merchantability or fitness for any particular
20 *  purpose.
21 *
22 *  Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Aleksey Grichenko, Eugene Vasilchenko
27 *
28 * File Description:
29 *   Data loader base class for object manager
30 *
31 */
32 
33 
34 #include <ncbi_pch.hpp>
35 #include <objmgr/data_loader.hpp>
36 #include <objmgr/objmgr_exception.hpp>
37 #include <objects/seq/seq_id_handle.hpp>
38 #include <objmgr/annot_name.hpp>
39 #include <objmgr/annot_type_selector.hpp>
40 #include <objmgr/impl/tse_info.hpp>
41 #include <objmgr/impl/bioseq_info.hpp>
42 #include <objmgr/impl/tse_chunk_info.hpp>
43 #include <objmgr/objmgr_exception.hpp>
44 #include <objmgr/scope.hpp>
45 #include <objmgr/bioseq_handle.hpp>
46 #include <objects/seq/Seq_annot.hpp>
47 
48 
49 BEGIN_NCBI_SCOPE
BEGIN_SCOPE(objects)50 BEGIN_SCOPE(objects)
51 
52 
53 void CDataLoader::RegisterInObjectManager(
54     CObjectManager&            om,
55     CLoaderMaker_Base&         loader_maker,
56     CObjectManager::EIsDefault is_default,
57     CObjectManager::TPriority  priority)
58 {
59     om.RegisterDataLoader(loader_maker, is_default, priority);
60 }
61 
62 
CDataLoader(void)63 CDataLoader::CDataLoader(void)
64 {
65     m_Name = NStr::PtrToString(this);
66     return;
67 }
68 
69 
CDataLoader(const string & loader_name)70 CDataLoader::CDataLoader(const string& loader_name)
71     : m_Name(loader_name)
72 {
73     if (loader_name.empty())
74     {
75         m_Name = NStr::PtrToString(this);
76     }
77 }
78 
79 
~CDataLoader(void)80 CDataLoader::~CDataLoader(void)
81 {
82     return;
83 }
84 
85 
SetTargetDataSource(CDataSource & data_source)86 void CDataLoader::SetTargetDataSource(CDataSource& data_source)
87 {
88     m_DataSource = &data_source;
89 }
90 
91 
GetDataSource(void) const92 CDataSource* CDataLoader::GetDataSource(void) const
93 {
94     return m_DataSource;
95 }
96 
97 
SetName(const string & loader_name)98 void CDataLoader::SetName(const string& loader_name)
99 {
100     m_Name = loader_name;
101 }
102 
103 
GetName(void) const104 string CDataLoader::GetName(void) const
105 {
106     return m_Name;
107 }
108 
109 
DropTSE(CRef<CTSE_Info>)110 void CDataLoader::DropTSE(CRef<CTSE_Info> /*tse_info*/)
111 {
112 }
113 
114 
GC(void)115 void CDataLoader::GC(void)
116 {
117 }
118 
119 
120 CDataLoader::TTSE_LockSet
GetRecords(const CSeq_id_Handle &,EChoice)121 CDataLoader::GetRecords(const CSeq_id_Handle& /*idh*/,
122                         EChoice /*choice*/)
123 {
124     NCBI_THROW(CLoaderException, eNotImplemented,
125                "CDataLoader::GetRecords() is not implemented in subclass");
126 }
127 
128 
129 CDataLoader::TTSE_LockSet
GetRecordsNoBlobState(const CSeq_id_Handle & idh,EChoice choice)130 CDataLoader::GetRecordsNoBlobState(const CSeq_id_Handle& idh,
131                                    EChoice choice)
132 {
133     try {
134         return GetRecords(idh, choice);
135     }
136     catch ( CBlobStateException& /* ignored */ ) {
137         return TTSE_LockSet();
138     }
139 }
140 
141 
142 CDataLoader::TTSE_LockSet
GetDetailedRecords(const CSeq_id_Handle & idh,const SRequestDetails & details)143 CDataLoader::GetDetailedRecords(const CSeq_id_Handle& idh,
144                                 const SRequestDetails& details)
145 {
146     return GetRecords(idh, DetailsToChoice(details));
147 }
148 
149 
150 CDataLoader::TTSE_LockSet
GetExternalRecords(const CBioseq_Info & bioseq)151 CDataLoader::GetExternalRecords(const CBioseq_Info& bioseq)
152 {
153     TTSE_LockSet ret;
154     ITERATE ( CBioseq_Info::TId, it, bioseq.GetId() ) {
155         if ( GetBlobId(*it) ) {
156             // correct id is found
157             TTSE_LockSet ret2 = GetRecords(*it, eExtAnnot);
158             ret.swap(ret2);
159             break;
160         }
161     }
162     return ret;
163 }
164 
165 
166 CDataLoader::TTSE_LockSet
GetOrphanAnnotRecords(const CSeq_id_Handle & idh,const SAnnotSelector *)167 CDataLoader::GetOrphanAnnotRecords(const CSeq_id_Handle& idh,
168                                    const SAnnotSelector* /*sel*/)
169 {
170     return GetRecords(idh, eOrphanAnnot);
171 }
172 
173 
174 CDataLoader::TTSE_LockSet
GetExternalAnnotRecords(const CSeq_id_Handle & idh,const SAnnotSelector *)175 CDataLoader::GetExternalAnnotRecords(const CSeq_id_Handle& idh,
176                                      const SAnnotSelector* /*sel*/)
177 {
178     return GetRecords(idh, eExtAnnot);
179 }
180 
181 
182 CDataLoader::TTSE_LockSet
GetExternalAnnotRecords(const CBioseq_Info & bioseq,const SAnnotSelector * sel)183 CDataLoader::GetExternalAnnotRecords(const CBioseq_Info& bioseq,
184                                      const SAnnotSelector* sel)
185 {
186     TTSE_LockSet ret;
187     ITERATE ( CBioseq_Info::TId, it, bioseq.GetId() ) {
188         if ( !CanGetBlobById() || GetBlobId(*it) ) {
189             // correct id is found
190             TTSE_LockSet ret2 = GetExternalAnnotRecords(*it, sel);
191             if ( !ret2.empty() ) {
192                 ret.swap(ret2);
193                 break;
194             }
195         }
196     }
197     return ret;
198 }
199 
200 
IsRequestedAnyNA(const SAnnotSelector * sel)201 bool CDataLoader::IsRequestedAnyNA(const SAnnotSelector* sel)
202 {
203     return sel && sel->IsIncludedAnyNamedAnnotAccession();
204 }
205 
206 
IsRequestedNA(const string & na,const SAnnotSelector * sel)207 bool CDataLoader::IsRequestedNA(const string& na,
208                                 const SAnnotSelector* sel)
209 {
210     return sel && sel->IsIncludedNamedAnnotAccession(na);
211 }
212 
213 
IsProcessedNA(const string & na,const TProcessedNAs * processed_nas)214 bool CDataLoader::IsProcessedNA(const string& na,
215                                 const TProcessedNAs* processed_nas)
216 {
217     return processed_nas && processed_nas->find(na) == processed_nas->end();
218 }
219 
220 
SetProcessedNA(const string & na,TProcessedNAs * processed_nas)221 void CDataLoader::SetProcessedNA(const string& na,
222                                  TProcessedNAs* processed_nas)
223 {
224     if ( processed_nas ) {
225         processed_nas->insert(na);
226     }
227 }
228 
229 
230 CDataLoader::TTSE_LockSet
GetOrphanAnnotRecordsNA(const CSeq_id_Handle & idh,const SAnnotSelector * sel,TProcessedNAs *)231 CDataLoader::GetOrphanAnnotRecordsNA(const CSeq_id_Handle& idh,
232                                      const SAnnotSelector* sel,
233                                      TProcessedNAs* /*processed_nas*/)
234 {
235     // as a backup call old method that cannot report processed NAs
236     return GetOrphanAnnotRecords(idh, sel);
237 }
238 
239 
240 CDataLoader::TTSE_LockSet
GetOrphanAnnotRecordsNA(const TSeq_idSet & ids,const SAnnotSelector * sel,TProcessedNAs * processed_nas)241 CDataLoader::GetOrphanAnnotRecordsNA(const TSeq_idSet& ids,
242                                      const SAnnotSelector* sel,
243                                      TProcessedNAs* processed_nas)
244 {
245     CDataLoader::TTSE_LockSet tse_set;
246     ITERATE(TSeq_idSet, id_it, ids) {
247         CDataLoader::TTSE_LockSet tse_set2 =
248             GetOrphanAnnotRecordsNA(*id_it, sel, processed_nas);
249         if (!tse_set2.empty()) {
250             if (tse_set.empty()) {
251                 tse_set.swap(tse_set2);
252             }
253             else {
254                 tse_set.insert(tse_set2.begin(), tse_set2.end());
255             }
256         }
257     }
258     return tse_set;
259 }
260 
261 
262 CDataLoader::TTSE_LockSet
GetExternalAnnotRecordsNA(const CSeq_id_Handle & idh,const SAnnotSelector * sel,TProcessedNAs *)263 CDataLoader::GetExternalAnnotRecordsNA(const CSeq_id_Handle& idh,
264                                        const SAnnotSelector* sel,
265                                        TProcessedNAs* /*processed_nas*/)
266 {
267     // as a backup call old method that cannot report processed NAs
268     return GetExternalAnnotRecords(idh, sel);
269 }
270 
271 
272 CDataLoader::TTSE_LockSet
GetExternalAnnotRecordsNA(const CBioseq_Info & bioseq,const SAnnotSelector * sel,TProcessedNAs *)273 CDataLoader::GetExternalAnnotRecordsNA(const CBioseq_Info& bioseq,
274                                        const SAnnotSelector* sel,
275                                        TProcessedNAs* /*processed_nas*/)
276 {
277     // as a backup call old method that cannot report processed NAs
278     return GetExternalAnnotRecords(bioseq, sel);
279 }
280 
281 
CanGetBlobById(void) const282 bool CDataLoader::CanGetBlobById(void) const
283 {
284     return false;
285 }
286 
287 
GetBlobById(const TBlobId &)288 CDataLoader::TTSE_Lock CDataLoader::GetBlobById(const TBlobId& /*blob_id*/)
289 {
290     NCBI_THROW(CLoaderException, eNotImplemented,
291                "CDataLoader::GetBlobById() is not implemented in subclass");
292 }
293 
GetBlobIdFromString(const string &) const294 CDataLoader::TBlobId CDataLoader::GetBlobIdFromString(const string& /*str*/) const
295 {
296     NCBI_THROW(CLoaderException, eNotImplemented,
297                "CDataLoader::GetBlobIdFromString(str) is not implemented in subclass");
298 }
299 
300 
GetIds(const CSeq_id_Handle & idh,TIds & ids)301 void CDataLoader::GetIds(const CSeq_id_Handle& idh, TIds& ids)
302 {
303     TTSE_LockSet locks = GetRecordsNoBlobState(idh, eBioseqCore);
304     ITERATE(TTSE_LockSet, it, locks) {
305         CConstRef<CBioseq_Info> bs_info = (*it)->FindMatchingBioseq(idh);
306         if ( bs_info ) {
307             ids = bs_info->GetId();
308             break;
309         }
310     }
311 }
312 
313 
SequenceExists(const CSeq_id_Handle & idh)314 bool CDataLoader::SequenceExists(const CSeq_id_Handle& idh)
315 {
316     // check if sequence exists
317     TIds ids;
318     GetIds(idh, ids);
319     return !ids.empty();
320 }
321 
322 
GetAccVer(const CSeq_id_Handle & idh)323 CSeq_id_Handle CDataLoader::GetAccVer(const CSeq_id_Handle& idh)
324 {
325     // default implementation based on GetIds();
326     TIds ids;
327     GetIds(idh, ids);
328     if ( ids.empty() ) {
329         NCBI_THROW(CLoaderException, eNotFound,
330                    "CDataLoader::GetAccVer() sequence not found");
331     }
332     CSeq_id_Handle acc = CScope::x_GetAccVer(ids);
333     if ( !acc ) {
334         NCBI_THROW(CLoaderException, eNoData,
335                    "CDataLoader::GetAccVer() sequence doesn't have accession");
336     }
337     return acc;
338 }
339 
340 
341 CDataLoader::SAccVerFound
GetAccVerFound(const CSeq_id_Handle & idh)342 CDataLoader::GetAccVerFound(const CSeq_id_Handle& idh)
343 {
344     // default implementation based on GetAccVer() and GetIds()
345     SAccVerFound ret;
346     try {
347         ret.acc_ver = GetAccVer(idh);
348         ret.sequence_found = ret.acc_ver || SequenceExists(idh);
349     }
350     catch ( CLoaderException& exc ) {
351         if ( exc.GetErrCode() == exc.eNotFound ) {
352             // no sequence
353         }
354         else if ( exc.GetErrCode() == exc.eNoData ) {
355             // sequence is known, but there is no accession
356             ret.sequence_found = true;
357         }
358         else {
359             // problem
360             throw;
361         }
362     }
363     return ret;
364 }
365 
366 
GetGi(const CSeq_id_Handle & idh)367 TGi CDataLoader::GetGi(const CSeq_id_Handle& idh)
368 {
369     // default implementation based on GetIds();
370     TIds ids;
371     GetIds(idh, ids);
372     if ( ids.empty() ) {
373         NCBI_THROW(CLoaderException, eNotFound,
374                    "CDataLoader::GetGi() sequence not found");
375     }
376     TGi gi = CScope::x_GetGi(ids);
377     if ( gi == ZERO_GI ) {
378         NCBI_THROW(CLoaderException, eNoData,
379                    "CDataLoader::GetGi() sequence doesn't have GI");
380     }
381     return gi;
382 }
383 
384 
GetGiFound(const CSeq_id_Handle & idh)385 CDataLoader::SGiFound CDataLoader::GetGiFound(const CSeq_id_Handle& idh)
386 {
387     // default implementation based on GetGi() and GetIds()
388     SGiFound ret;
389     try {
390         ret.gi = GetGi(idh);
391         ret.sequence_found = ret.gi != ZERO_GI || SequenceExists(idh);
392     }
393     catch ( CLoaderException& exc ) {
394         if ( exc.GetErrCode() == exc.eNotFound ) {
395             // no sequence
396         }
397         else if ( exc.GetErrCode() == exc.eNoData ) {
398             // sequence is known, but there is no GI
399             ret.sequence_found = true;
400         }
401         else {
402             // problem
403             throw;
404         }
405     }
406     return ret;
407 }
408 
409 
GetLabel(const CSeq_id_Handle & idh)410 string CDataLoader::GetLabel(const CSeq_id_Handle& idh)
411 {
412     // default implementation based on GetIds();
413     TIds ids;
414     GetIds(idh, ids);
415     if ( ids.empty() ) {
416         return string();
417     }
418     return objects::GetLabel(ids);
419 }
420 
421 
GetTaxId(const CSeq_id_Handle & idh)422 TTaxId CDataLoader::GetTaxId(const CSeq_id_Handle& idh)
423 {
424     // default implementation based on GetRecordsNoBlobState();
425     TTSE_LockSet locks = GetRecordsNoBlobState(idh, eBioseqCore);
426     ITERATE(TTSE_LockSet, it, locks) {
427         CConstRef<CBioseq_Info> bs_info = (*it)->FindMatchingBioseq(idh);
428         if ( bs_info ) {
429             return bs_info->GetTaxId();
430         }
431     }
432     return INVALID_TAX_ID;
433 }
434 
435 
GetSequenceLength(const CSeq_id_Handle & idh)436 TSeqPos CDataLoader::GetSequenceLength(const CSeq_id_Handle& idh)
437 {
438     // default implementation based on GetRecordsNoBlobState()
439     TTSE_LockSet locks = GetRecordsNoBlobState(idh, eBioseqCore);
440     ITERATE(TTSE_LockSet, it, locks) {
441         CConstRef<CBioseq_Info> bs_info = (*it)->FindMatchingBioseq(idh);
442         if ( bs_info ) {
443             return bs_info->GetBioseqLength();
444         }
445     }
446     return kInvalidSeqPos;
447 }
448 
449 
GetSequenceType(const CSeq_id_Handle & idh)450 CSeq_inst::TMol CDataLoader::GetSequenceType(const CSeq_id_Handle& idh)
451 {
452     // default implementation based on GetRecordsNoBlobState()
453     TTSE_LockSet locks = GetRecordsNoBlobState(idh, eBioseqCore);
454     ITERATE(TTSE_LockSet, it, locks) {
455         CConstRef<CBioseq_Info> bs_info = (*it)->FindMatchingBioseq(idh);
456         if ( bs_info ) {
457             CSeq_inst::TMol type = bs_info->GetInst_Mol();
458             if ( type == CSeq_inst::eMol_not_set ) {
459                 NCBI_THROW(CLoaderException, eNoData,
460                            "CDataLoader::GetSequenceType() type not set");
461             }
462             return type;
463         }
464     }
465     NCBI_THROW(CLoaderException, eNotFound,
466                "CDataLoader::GetSequenceType() sequence not found");
467 }
468 
469 
470 CDataLoader::STypeFound
GetSequenceTypeFound(const CSeq_id_Handle & idh)471 CDataLoader::GetSequenceTypeFound(const CSeq_id_Handle& idh)
472 {
473     // default implementation based on GetSequenceType() and GetIds()
474     STypeFound ret;
475     try {
476         ret.type = GetSequenceType(idh);
477         ret.sequence_found =
478             ret.type != CSeq_inst::eMol_not_set || SequenceExists(idh);
479     }
480     catch ( CLoaderException& exc ) {
481         if ( exc.GetErrCode() == exc.eNotFound ) {
482             // no sequence
483         }
484         else if ( exc.GetErrCode() == exc.eNoData ) {
485             // sequence is known, but there is no type
486             ret.sequence_found = true;
487         }
488         else {
489             // problem
490             throw;
491         }
492     }
493     return ret;
494 }
495 
496 
GetSequenceState(const CSeq_id_Handle & idh)497 int CDataLoader::GetSequenceState(const CSeq_id_Handle& idh)
498 {
499     try {
500         TTSE_LockSet locks = GetRecords(idh, eBioseqCore);
501         ITERATE(TTSE_LockSet, it, locks) {
502             CConstRef<CBioseq_Info> bs_info = (*it)->FindMatchingBioseq(idh);
503             if ( bs_info ) {
504                 return (*it)->GetBlobState();
505             }
506         }
507         return CBioseq_Handle::fState_not_found|CBioseq_Handle::fState_no_data;
508     }
509     catch ( CBlobStateException& exc ) {
510         return exc.GetBlobState();
511     }
512 }
513 
514 
GetSequenceHash(const CSeq_id_Handle & idh)515 int CDataLoader::GetSequenceHash(const CSeq_id_Handle& idh)
516 {
517     if ( SequenceExists(idh) ) {
518         NCBI_THROW(CLoaderException, eNoData,
519                    "CDataLoader::GetSequenceHash() sequence hash not set");
520     }
521     NCBI_THROW(CLoaderException, eNotFound,
522                "CDataLoader::GetSequenceHash() sequence not found");
523 }
524 
525 
526 CDataLoader::SHashFound
GetSequenceHashFound(const CSeq_id_Handle & idh)527 CDataLoader::GetSequenceHashFound(const CSeq_id_Handle& idh)
528 {
529     // default implementation based on GetSequenceHash() and GetIds()
530     SHashFound ret;
531     try {
532         ret.hash = GetSequenceHash(idh);
533         if ( !ret.hash ) {
534             // hash = 0, we don't know what causes it:
535             // absence of sequence, unknown hash, or the hash happens to be 0.
536             ret.sequence_found = SequenceExists(idh);
537         }
538         else {
539             ret.sequence_found = true;
540             ret.hash_known = true;
541         }
542     }
543     catch ( CLoaderException& exc ) {
544         if ( exc.GetErrCode() == exc.eNotFound ) {
545             // no sequence found
546         }
547         else if ( exc.GetErrCode() == exc.eNoData ) {
548             // sequence exists
549             ret.sequence_found = true;
550         }
551         else {
552             // problem
553             throw;
554         }
555     }
556     return ret;
557 }
558 
559 
GetAccVers(const TIds & ids,TLoaded & loaded,TIds & ret)560 void CDataLoader::GetAccVers(const TIds& ids, TLoaded& loaded, TIds& ret)
561 {
562     size_t count = ids.size();
563     _ASSERT(ids.size() == loaded.size());
564     _ASSERT(ids.size() == ret.size());
565     TIds seq_ids;
566     for ( size_t i = 0; i < count; ++i ) {
567         if ( loaded[i] ) {
568             continue;
569         }
570         SAccVerFound data = GetAccVerFound(ids[i]);
571         if ( data.sequence_found ) {
572             ret[i] = data.acc_ver;
573             loaded[i] = true;
574         }
575     }
576 }
577 
578 
GetGis(const TIds & ids,TLoaded & loaded,TGis & ret)579 void CDataLoader::GetGis(const TIds& ids, TLoaded& loaded, TGis& ret)
580 {
581     size_t count = ids.size();
582     _ASSERT(ids.size() == loaded.size());
583     _ASSERT(ids.size() == ret.size());
584     TIds seq_ids;
585     for ( size_t i = 0; i < count; ++i ) {
586         if ( loaded[i] ) {
587             continue;
588         }
589         SGiFound data = GetGiFound(ids[i]);
590         if ( data.sequence_found ) {
591             ret[i] = data.gi;
592             loaded[i] = true;
593         }
594     }
595 }
596 
597 
GetLabels(const TIds & ids,TLoaded & loaded,TLabels & ret)598 void CDataLoader::GetLabels(const TIds& ids, TLoaded& loaded, TLabels& ret)
599 {
600     size_t count = ids.size();
601     _ASSERT(ids.size() == loaded.size());
602     _ASSERT(ids.size() == ret.size());
603     for ( size_t i = 0; i < count; ++i ) {
604         if ( loaded[i] ) {
605             continue;
606         }
607         string label = GetLabel(ids[i]);
608         if ( !label.empty() ) {
609             ret[i] = label;
610             loaded[i] = true;
611         }
612     }
613 }
614 
615 
GetTaxIds(const TIds & ids,TLoaded & loaded,TTaxIds & ret)616 void CDataLoader::GetTaxIds(const TIds& ids, TLoaded& loaded, TTaxIds& ret)
617 {
618     size_t count = ids.size();
619     _ASSERT(ids.size() == loaded.size());
620     _ASSERT(ids.size() == ret.size());
621     for ( size_t i = 0; i < count; ++i ) {
622         if ( loaded[i] ) {
623             continue;
624         }
625 
626         TTaxId taxid = GetTaxId(ids[i]);
627         if ( taxid != INVALID_TAX_ID ) {
628             ret[i] = taxid;
629             loaded[i] = true;
630         }
631     }
632 }
633 
634 
GetSequenceLengths(const TIds & ids,TLoaded & loaded,TSequenceLengths & ret)635 void CDataLoader::GetSequenceLengths(const TIds& ids, TLoaded& loaded,
636                                      TSequenceLengths& ret)
637 {
638     size_t count = ids.size();
639     _ASSERT(ids.size() == loaded.size());
640     _ASSERT(ids.size() == ret.size());
641     for ( size_t i = 0; i < count; ++i ) {
642         if ( loaded[i] ) {
643             continue;
644         }
645 
646         TSeqPos len = GetSequenceLength(ids[i]);
647         if ( len != kInvalidSeqPos ) {
648             ret[i] = len;
649             loaded[i] = true;
650         }
651     }
652 }
653 
654 
GetSequenceTypes(const TIds & ids,TLoaded & loaded,TSequenceTypes & ret)655 void CDataLoader::GetSequenceTypes(const TIds& ids, TLoaded& loaded,
656                                    TSequenceTypes& ret)
657 {
658     size_t count = ids.size();
659     _ASSERT(ids.size() == loaded.size());
660     _ASSERT(ids.size() == ret.size());
661     for ( size_t i = 0; i < count; ++i ) {
662         if ( loaded[i] ) {
663             continue;
664         }
665 
666         STypeFound data = GetSequenceTypeFound(ids[i]);
667         if ( data.sequence_found ) {
668             ret[i] = data.type;
669             loaded[i] = true;
670         }
671     }
672 }
673 
674 
GetSequenceStates(const TIds & ids,TLoaded & loaded,TSequenceStates & ret)675 void CDataLoader::GetSequenceStates(const TIds& ids, TLoaded& loaded,
676                                    TSequenceStates& ret)
677 {
678     const int kNotFound = (CBioseq_Handle::fState_not_found |
679                            CBioseq_Handle::fState_no_data);
680 
681     size_t count = ids.size();
682     _ASSERT(ids.size() == loaded.size());
683     _ASSERT(ids.size() == ret.size());
684     for ( size_t i = 0; i < count; ++i ) {
685         if ( loaded[i] ) {
686             continue;
687         }
688 
689         int state = GetSequenceState(ids[i]);
690         if ( state != kNotFound ) {
691             ret[i] = state;
692             loaded[i] = true;
693         }
694     }
695 }
696 
697 
GetSequenceHashes(const TIds & ids,TLoaded & loaded,TSequenceHashes & ret,THashKnown & known)698 void CDataLoader::GetSequenceHashes(const TIds& ids, TLoaded& loaded,
699                                     TSequenceHashes& ret, THashKnown& known)
700 {
701     size_t count = ids.size();
702     _ASSERT(ids.size() == loaded.size());
703     _ASSERT(ids.size() == ret.size());
704     for ( size_t i = 0; i < count; ++i ) {
705         if ( loaded[i] ) {
706             continue;
707         }
708 
709         SHashFound data = GetSequenceHashFound(ids[i]);
710         if ( data.sequence_found ) {
711             ret[i] = data.hash;
712             loaded[i] = true;
713             known[i] = data.hash_known;
714         }
715     }
716 }
717 
718 
GetBlobs(TTSE_LockSets & tse_sets)719 void CDataLoader::GetBlobs(TTSE_LockSets& tse_sets)
720 {
721     NON_CONST_ITERATE(TTSE_LockSets, tse_set, tse_sets) {
722         tse_set->second = GetRecords(tse_set->first, eBlob);
723     }
724 }
725 
726 
727 CDataLoader::EChoice
DetailsToChoice(const SRequestDetails::TAnnotSet & annots) const728 CDataLoader::DetailsToChoice(const SRequestDetails::TAnnotSet& annots) const
729 {
730     EChoice ret = eCore;
731     ITERATE ( SRequestDetails::TAnnotSet, i, annots ) {
732         ITERATE ( SRequestDetails::TAnnotTypesSet, j, i->second ) {
733             EChoice cur = eCore;
734             switch ( j->GetAnnotType() ) {
735             case CSeq_annot::C_Data::e_Ftable:
736                 cur = eFeatures;
737                 break;
738             case CSeq_annot::C_Data::e_Graph:
739                 cur = eGraph;
740                 break;
741             case CSeq_annot::C_Data::e_Align:
742                 cur = eAlign;
743                 break;
744             case CSeq_annot::C_Data::e_not_set:
745                 return eAnnot;
746             default:
747                 break;
748             }
749             if ( cur != eCore && cur != ret ) {
750                 if ( ret != eCore ) return eAnnot;
751                 ret = cur;
752             }
753         }
754     }
755     return ret;
756 }
757 
758 
759 CDataLoader::EChoice
DetailsToChoice(const SRequestDetails & details) const760 CDataLoader::DetailsToChoice(const SRequestDetails& details) const
761 {
762     EChoice ret = DetailsToChoice(details.m_NeedAnnots);
763     switch ( details.m_AnnotBlobType ) {
764     case SRequestDetails::fAnnotBlobNone:
765         // no annotations
766         ret = eCore;
767         break;
768     case SRequestDetails::fAnnotBlobInternal:
769         // no change
770         break;
771     case SRequestDetails::fAnnotBlobExternal:
772         // shift from internal to external annotations
773         _ASSERT(ret >= eFeatures && ret <= eAnnot);
774         ret = EChoice(ret + eExtFeatures - eFeatures);
775         _ASSERT(ret >= eExtFeatures && ret <= eExtAnnot);
776         break;
777     case SRequestDetails::fAnnotBlobOrphan:
778         // all orphan annots
779         ret = eOrphanAnnot;
780         break;
781     default:
782         // all other cases -> eAll
783         ret = eAll;
784         break;
785     }
786     if ( !details.m_NeedSeqMap.Empty() || !details.m_NeedSeqData.Empty() ) {
787         // include sequence
788         if ( ret == eCore ) {
789             ret = eSequence;
790         }
791         else if ( ret >= eFeatures && ret <= eAnnot ) {
792             // only internal annot + sequence -> whole blob
793             ret = eBlob;
794         }
795         else {
796             // all blobs
797             ret = eAll;
798         }
799     }
800     return ret;
801 }
802 
803 
ChoiceToDetails(EChoice choice) const804 SRequestDetails CDataLoader::ChoiceToDetails(EChoice choice) const
805 {
806     SRequestDetails details;
807     CSeq_annot::C_Data::E_Choice type = CSeq_annot::C_Data::e_not_set;
808     bool sequence = false;
809     switch ( choice ) {
810     case eAll:
811         sequence = true;
812         // from all blobs
813         details.m_AnnotBlobType = SRequestDetails::fAnnotBlobAll;
814         break;
815     case eBlob:
816     case eBioseq:
817     case eBioseqCore:
818         sequence = true;
819         // internal only
820         details.m_AnnotBlobType = SRequestDetails::fAnnotBlobInternal;
821         break;
822     case eSequence:
823         sequence = true;
824         break;
825     case eAnnot:
826         // internal only
827         details.m_AnnotBlobType = SRequestDetails::fAnnotBlobInternal;
828         break;
829     case eGraph:
830         type = CSeq_annot::C_Data::e_Graph;
831         // internal only
832         details.m_AnnotBlobType = SRequestDetails::fAnnotBlobInternal;
833         break;
834     case eFeatures:
835         type = CSeq_annot::C_Data::e_Ftable;
836         // internal only
837         details.m_AnnotBlobType = SRequestDetails::fAnnotBlobInternal;
838         break;
839     case eAlign:
840         type = CSeq_annot::C_Data::e_Align;
841         // internal only
842         details.m_AnnotBlobType = SRequestDetails::fAnnotBlobInternal;
843         break;
844     case eExtAnnot:
845         // external only
846         details.m_AnnotBlobType = SRequestDetails::fAnnotBlobExternal;
847         break;
848     case eExtGraph:
849         type = CSeq_annot::C_Data::e_Graph;
850         // external only
851         details.m_AnnotBlobType = SRequestDetails::fAnnotBlobExternal;
852         break;
853     case eExtFeatures:
854         type = CSeq_annot::C_Data::e_Ftable;
855         // external only
856         details.m_AnnotBlobType = SRequestDetails::fAnnotBlobExternal;
857         break;
858     case eExtAlign:
859         type = CSeq_annot::C_Data::e_Align;
860         // external only
861         details.m_AnnotBlobType = SRequestDetails::fAnnotBlobExternal;
862         break;
863     case eOrphanAnnot:
864         // orphan annotations only
865         details.m_AnnotBlobType = SRequestDetails::fAnnotBlobOrphan;
866         break;
867     default:
868         break;
869     }
870     if ( sequence ) {
871         details.m_NeedSeqMap = SRequestDetails::TRange::GetWhole();
872         details.m_NeedSeqData = SRequestDetails::TRange::GetWhole();
873     }
874     if ( details.m_AnnotBlobType != SRequestDetails::fAnnotBlobNone ) {
875         details.m_NeedAnnots[CAnnotName()].insert(SAnnotTypeSelector(type));
876     }
877     return details;
878 }
879 
880 
GetChunk(TChunk)881 void CDataLoader::GetChunk(TChunk /*chunk_info*/)
882 {
883     NCBI_THROW(CLoaderException, eNotImplemented,
884                "CDataLoader::GetChunk() is not implemented in subclass");
885 }
886 
887 
GetChunks(const TChunkSet & chunks)888 void CDataLoader::GetChunks(const TChunkSet& chunks)
889 {
890     ITERATE ( TChunkSet, it, chunks ) {
891         GetChunk(*it);
892     }
893 }
894 
895 
896 CDataLoader::TTSE_Lock
ResolveConflict(const CSeq_id_Handle &,const TTSE_LockSet &)897 CDataLoader::ResolveConflict(const CSeq_id_Handle& /*id*/,
898                              const TTSE_LockSet& /*tse_set*/)
899 {
900     return TTSE_Lock();
901 }
902 
903 
GetBlobId(const CSeq_id_Handle &)904 CDataLoader::TBlobId CDataLoader::GetBlobId(const CSeq_id_Handle& /*sih*/)
905 {
906     return TBlobId();
907 }
908 
909 
GetBlobVersion(const TBlobId &)910 CDataLoader::TBlobVersion CDataLoader::GetBlobVersion(const TBlobId& /*id*/)
911 {
912     return 0;
913 }
914 
GetEditSaver() const915 CDataLoader::TEditSaver CDataLoader::GetEditSaver() const
916 {
917     return TEditSaver();
918 }
919 
920 
GetDefaultPriority(void) const921 CObjectManager::TPriority CDataLoader::GetDefaultPriority(void) const
922 {
923     return CObjectManager::kPriority_Loader;
924 }
925 
926 
EstimateLoadBytes(const CTSE_Chunk_Info &) const927 Uint4 CDataLoader::EstimateLoadBytes(const CTSE_Chunk_Info& /*chunk*/) const
928 {
929     return 32000; // assume 32KB chunk size
930 }
931 
932 
EstimateLoadSeconds(const CTSE_Chunk_Info &,Uint4 bytes) const933 double CDataLoader::EstimateLoadSeconds(const CTSE_Chunk_Info& /*chunk*/, Uint4 bytes) const
934 {
935     return bytes*1e-7+0.001; // assume 10MB/s transfer speed and 1ms overhead
936 }
937 
938 
GetDefaultBlobCacheSizeLimit(void) const939 unsigned CDataLoader::GetDefaultBlobCacheSizeLimit(void) const
940 {
941     return kMax_UInt;
942 }
943 
944 
945 /////////////////////////////////////////////////////////////////////////////
946 // CBlobId
947 
~CBlobId(void)948 CBlobId::~CBlobId(void)
949 {
950 }
951 
LessByTypeId(const CBlobId & id2) const952 bool CBlobId::LessByTypeId(const CBlobId& id2) const
953 {
954     return typeid(*this).before(typeid(id2));
955 }
956 
operator ==(const CBlobId & id) const957 bool CBlobId::operator==(const CBlobId& id) const
958 {
959     return !(*this < id || id < *this);
960 }
961 
962 
963 END_SCOPE(objects)
964 END_NCBI_SCOPE
965