1 /* $Id: data_loader.cpp 610968 2020-06-26 12:55:17Z grichenk $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Aleksey Grichenko, Eugene Vasilchenko
27 *
28 * File Description:
29 * Data loader base class for object manager
30 *
31 */
32
33
34 #include <ncbi_pch.hpp>
35 #include <objmgr/data_loader.hpp>
36 #include <objmgr/objmgr_exception.hpp>
37 #include <objects/seq/seq_id_handle.hpp>
38 #include <objmgr/annot_name.hpp>
39 #include <objmgr/annot_type_selector.hpp>
40 #include <objmgr/impl/tse_info.hpp>
41 #include <objmgr/impl/bioseq_info.hpp>
42 #include <objmgr/impl/tse_chunk_info.hpp>
43 #include <objmgr/objmgr_exception.hpp>
44 #include <objmgr/scope.hpp>
45 #include <objmgr/bioseq_handle.hpp>
46 #include <objects/seq/Seq_annot.hpp>
47
48
49 BEGIN_NCBI_SCOPE
BEGIN_SCOPE(objects)50 BEGIN_SCOPE(objects)
51
52
53 void CDataLoader::RegisterInObjectManager(
54 CObjectManager& om,
55 CLoaderMaker_Base& loader_maker,
56 CObjectManager::EIsDefault is_default,
57 CObjectManager::TPriority priority)
58 {
59 om.RegisterDataLoader(loader_maker, is_default, priority);
60 }
61
62
CDataLoader(void)63 CDataLoader::CDataLoader(void)
64 {
65 m_Name = NStr::PtrToString(this);
66 return;
67 }
68
69
CDataLoader(const string & loader_name)70 CDataLoader::CDataLoader(const string& loader_name)
71 : m_Name(loader_name)
72 {
73 if (loader_name.empty())
74 {
75 m_Name = NStr::PtrToString(this);
76 }
77 }
78
79
~CDataLoader(void)80 CDataLoader::~CDataLoader(void)
81 {
82 return;
83 }
84
85
SetTargetDataSource(CDataSource & data_source)86 void CDataLoader::SetTargetDataSource(CDataSource& data_source)
87 {
88 m_DataSource = &data_source;
89 }
90
91
GetDataSource(void) const92 CDataSource* CDataLoader::GetDataSource(void) const
93 {
94 return m_DataSource;
95 }
96
97
SetName(const string & loader_name)98 void CDataLoader::SetName(const string& loader_name)
99 {
100 m_Name = loader_name;
101 }
102
103
GetName(void) const104 string CDataLoader::GetName(void) const
105 {
106 return m_Name;
107 }
108
109
DropTSE(CRef<CTSE_Info>)110 void CDataLoader::DropTSE(CRef<CTSE_Info> /*tse_info*/)
111 {
112 }
113
114
GC(void)115 void CDataLoader::GC(void)
116 {
117 }
118
119
120 CDataLoader::TTSE_LockSet
GetRecords(const CSeq_id_Handle &,EChoice)121 CDataLoader::GetRecords(const CSeq_id_Handle& /*idh*/,
122 EChoice /*choice*/)
123 {
124 NCBI_THROW(CLoaderException, eNotImplemented,
125 "CDataLoader::GetRecords() is not implemented in subclass");
126 }
127
128
129 CDataLoader::TTSE_LockSet
GetRecordsNoBlobState(const CSeq_id_Handle & idh,EChoice choice)130 CDataLoader::GetRecordsNoBlobState(const CSeq_id_Handle& idh,
131 EChoice choice)
132 {
133 try {
134 return GetRecords(idh, choice);
135 }
136 catch ( CBlobStateException& /* ignored */ ) {
137 return TTSE_LockSet();
138 }
139 }
140
141
142 CDataLoader::TTSE_LockSet
GetDetailedRecords(const CSeq_id_Handle & idh,const SRequestDetails & details)143 CDataLoader::GetDetailedRecords(const CSeq_id_Handle& idh,
144 const SRequestDetails& details)
145 {
146 return GetRecords(idh, DetailsToChoice(details));
147 }
148
149
150 CDataLoader::TTSE_LockSet
GetExternalRecords(const CBioseq_Info & bioseq)151 CDataLoader::GetExternalRecords(const CBioseq_Info& bioseq)
152 {
153 TTSE_LockSet ret;
154 ITERATE ( CBioseq_Info::TId, it, bioseq.GetId() ) {
155 if ( GetBlobId(*it) ) {
156 // correct id is found
157 TTSE_LockSet ret2 = GetRecords(*it, eExtAnnot);
158 ret.swap(ret2);
159 break;
160 }
161 }
162 return ret;
163 }
164
165
166 CDataLoader::TTSE_LockSet
GetOrphanAnnotRecords(const CSeq_id_Handle & idh,const SAnnotSelector *)167 CDataLoader::GetOrphanAnnotRecords(const CSeq_id_Handle& idh,
168 const SAnnotSelector* /*sel*/)
169 {
170 return GetRecords(idh, eOrphanAnnot);
171 }
172
173
174 CDataLoader::TTSE_LockSet
GetExternalAnnotRecords(const CSeq_id_Handle & idh,const SAnnotSelector *)175 CDataLoader::GetExternalAnnotRecords(const CSeq_id_Handle& idh,
176 const SAnnotSelector* /*sel*/)
177 {
178 return GetRecords(idh, eExtAnnot);
179 }
180
181
182 CDataLoader::TTSE_LockSet
GetExternalAnnotRecords(const CBioseq_Info & bioseq,const SAnnotSelector * sel)183 CDataLoader::GetExternalAnnotRecords(const CBioseq_Info& bioseq,
184 const SAnnotSelector* sel)
185 {
186 TTSE_LockSet ret;
187 ITERATE ( CBioseq_Info::TId, it, bioseq.GetId() ) {
188 if ( !CanGetBlobById() || GetBlobId(*it) ) {
189 // correct id is found
190 TTSE_LockSet ret2 = GetExternalAnnotRecords(*it, sel);
191 if ( !ret2.empty() ) {
192 ret.swap(ret2);
193 break;
194 }
195 }
196 }
197 return ret;
198 }
199
200
IsRequestedAnyNA(const SAnnotSelector * sel)201 bool CDataLoader::IsRequestedAnyNA(const SAnnotSelector* sel)
202 {
203 return sel && sel->IsIncludedAnyNamedAnnotAccession();
204 }
205
206
IsRequestedNA(const string & na,const SAnnotSelector * sel)207 bool CDataLoader::IsRequestedNA(const string& na,
208 const SAnnotSelector* sel)
209 {
210 return sel && sel->IsIncludedNamedAnnotAccession(na);
211 }
212
213
IsProcessedNA(const string & na,const TProcessedNAs * processed_nas)214 bool CDataLoader::IsProcessedNA(const string& na,
215 const TProcessedNAs* processed_nas)
216 {
217 return processed_nas && processed_nas->find(na) == processed_nas->end();
218 }
219
220
SetProcessedNA(const string & na,TProcessedNAs * processed_nas)221 void CDataLoader::SetProcessedNA(const string& na,
222 TProcessedNAs* processed_nas)
223 {
224 if ( processed_nas ) {
225 processed_nas->insert(na);
226 }
227 }
228
229
230 CDataLoader::TTSE_LockSet
GetOrphanAnnotRecordsNA(const CSeq_id_Handle & idh,const SAnnotSelector * sel,TProcessedNAs *)231 CDataLoader::GetOrphanAnnotRecordsNA(const CSeq_id_Handle& idh,
232 const SAnnotSelector* sel,
233 TProcessedNAs* /*processed_nas*/)
234 {
235 // as a backup call old method that cannot report processed NAs
236 return GetOrphanAnnotRecords(idh, sel);
237 }
238
239
240 CDataLoader::TTSE_LockSet
GetOrphanAnnotRecordsNA(const TSeq_idSet & ids,const SAnnotSelector * sel,TProcessedNAs * processed_nas)241 CDataLoader::GetOrphanAnnotRecordsNA(const TSeq_idSet& ids,
242 const SAnnotSelector* sel,
243 TProcessedNAs* processed_nas)
244 {
245 CDataLoader::TTSE_LockSet tse_set;
246 ITERATE(TSeq_idSet, id_it, ids) {
247 CDataLoader::TTSE_LockSet tse_set2 =
248 GetOrphanAnnotRecordsNA(*id_it, sel, processed_nas);
249 if (!tse_set2.empty()) {
250 if (tse_set.empty()) {
251 tse_set.swap(tse_set2);
252 }
253 else {
254 tse_set.insert(tse_set2.begin(), tse_set2.end());
255 }
256 }
257 }
258 return tse_set;
259 }
260
261
262 CDataLoader::TTSE_LockSet
GetExternalAnnotRecordsNA(const CSeq_id_Handle & idh,const SAnnotSelector * sel,TProcessedNAs *)263 CDataLoader::GetExternalAnnotRecordsNA(const CSeq_id_Handle& idh,
264 const SAnnotSelector* sel,
265 TProcessedNAs* /*processed_nas*/)
266 {
267 // as a backup call old method that cannot report processed NAs
268 return GetExternalAnnotRecords(idh, sel);
269 }
270
271
272 CDataLoader::TTSE_LockSet
GetExternalAnnotRecordsNA(const CBioseq_Info & bioseq,const SAnnotSelector * sel,TProcessedNAs *)273 CDataLoader::GetExternalAnnotRecordsNA(const CBioseq_Info& bioseq,
274 const SAnnotSelector* sel,
275 TProcessedNAs* /*processed_nas*/)
276 {
277 // as a backup call old method that cannot report processed NAs
278 return GetExternalAnnotRecords(bioseq, sel);
279 }
280
281
CanGetBlobById(void) const282 bool CDataLoader::CanGetBlobById(void) const
283 {
284 return false;
285 }
286
287
GetBlobById(const TBlobId &)288 CDataLoader::TTSE_Lock CDataLoader::GetBlobById(const TBlobId& /*blob_id*/)
289 {
290 NCBI_THROW(CLoaderException, eNotImplemented,
291 "CDataLoader::GetBlobById() is not implemented in subclass");
292 }
293
GetBlobIdFromString(const string &) const294 CDataLoader::TBlobId CDataLoader::GetBlobIdFromString(const string& /*str*/) const
295 {
296 NCBI_THROW(CLoaderException, eNotImplemented,
297 "CDataLoader::GetBlobIdFromString(str) is not implemented in subclass");
298 }
299
300
GetIds(const CSeq_id_Handle & idh,TIds & ids)301 void CDataLoader::GetIds(const CSeq_id_Handle& idh, TIds& ids)
302 {
303 TTSE_LockSet locks = GetRecordsNoBlobState(idh, eBioseqCore);
304 ITERATE(TTSE_LockSet, it, locks) {
305 CConstRef<CBioseq_Info> bs_info = (*it)->FindMatchingBioseq(idh);
306 if ( bs_info ) {
307 ids = bs_info->GetId();
308 break;
309 }
310 }
311 }
312
313
SequenceExists(const CSeq_id_Handle & idh)314 bool CDataLoader::SequenceExists(const CSeq_id_Handle& idh)
315 {
316 // check if sequence exists
317 TIds ids;
318 GetIds(idh, ids);
319 return !ids.empty();
320 }
321
322
GetAccVer(const CSeq_id_Handle & idh)323 CSeq_id_Handle CDataLoader::GetAccVer(const CSeq_id_Handle& idh)
324 {
325 // default implementation based on GetIds();
326 TIds ids;
327 GetIds(idh, ids);
328 if ( ids.empty() ) {
329 NCBI_THROW(CLoaderException, eNotFound,
330 "CDataLoader::GetAccVer() sequence not found");
331 }
332 CSeq_id_Handle acc = CScope::x_GetAccVer(ids);
333 if ( !acc ) {
334 NCBI_THROW(CLoaderException, eNoData,
335 "CDataLoader::GetAccVer() sequence doesn't have accession");
336 }
337 return acc;
338 }
339
340
341 CDataLoader::SAccVerFound
GetAccVerFound(const CSeq_id_Handle & idh)342 CDataLoader::GetAccVerFound(const CSeq_id_Handle& idh)
343 {
344 // default implementation based on GetAccVer() and GetIds()
345 SAccVerFound ret;
346 try {
347 ret.acc_ver = GetAccVer(idh);
348 ret.sequence_found = ret.acc_ver || SequenceExists(idh);
349 }
350 catch ( CLoaderException& exc ) {
351 if ( exc.GetErrCode() == exc.eNotFound ) {
352 // no sequence
353 }
354 else if ( exc.GetErrCode() == exc.eNoData ) {
355 // sequence is known, but there is no accession
356 ret.sequence_found = true;
357 }
358 else {
359 // problem
360 throw;
361 }
362 }
363 return ret;
364 }
365
366
GetGi(const CSeq_id_Handle & idh)367 TGi CDataLoader::GetGi(const CSeq_id_Handle& idh)
368 {
369 // default implementation based on GetIds();
370 TIds ids;
371 GetIds(idh, ids);
372 if ( ids.empty() ) {
373 NCBI_THROW(CLoaderException, eNotFound,
374 "CDataLoader::GetGi() sequence not found");
375 }
376 TGi gi = CScope::x_GetGi(ids);
377 if ( gi == ZERO_GI ) {
378 NCBI_THROW(CLoaderException, eNoData,
379 "CDataLoader::GetGi() sequence doesn't have GI");
380 }
381 return gi;
382 }
383
384
GetGiFound(const CSeq_id_Handle & idh)385 CDataLoader::SGiFound CDataLoader::GetGiFound(const CSeq_id_Handle& idh)
386 {
387 // default implementation based on GetGi() and GetIds()
388 SGiFound ret;
389 try {
390 ret.gi = GetGi(idh);
391 ret.sequence_found = ret.gi != ZERO_GI || SequenceExists(idh);
392 }
393 catch ( CLoaderException& exc ) {
394 if ( exc.GetErrCode() == exc.eNotFound ) {
395 // no sequence
396 }
397 else if ( exc.GetErrCode() == exc.eNoData ) {
398 // sequence is known, but there is no GI
399 ret.sequence_found = true;
400 }
401 else {
402 // problem
403 throw;
404 }
405 }
406 return ret;
407 }
408
409
GetLabel(const CSeq_id_Handle & idh)410 string CDataLoader::GetLabel(const CSeq_id_Handle& idh)
411 {
412 // default implementation based on GetIds();
413 TIds ids;
414 GetIds(idh, ids);
415 if ( ids.empty() ) {
416 return string();
417 }
418 return objects::GetLabel(ids);
419 }
420
421
GetTaxId(const CSeq_id_Handle & idh)422 TTaxId CDataLoader::GetTaxId(const CSeq_id_Handle& idh)
423 {
424 // default implementation based on GetRecordsNoBlobState();
425 TTSE_LockSet locks = GetRecordsNoBlobState(idh, eBioseqCore);
426 ITERATE(TTSE_LockSet, it, locks) {
427 CConstRef<CBioseq_Info> bs_info = (*it)->FindMatchingBioseq(idh);
428 if ( bs_info ) {
429 return bs_info->GetTaxId();
430 }
431 }
432 return INVALID_TAX_ID;
433 }
434
435
GetSequenceLength(const CSeq_id_Handle & idh)436 TSeqPos CDataLoader::GetSequenceLength(const CSeq_id_Handle& idh)
437 {
438 // default implementation based on GetRecordsNoBlobState()
439 TTSE_LockSet locks = GetRecordsNoBlobState(idh, eBioseqCore);
440 ITERATE(TTSE_LockSet, it, locks) {
441 CConstRef<CBioseq_Info> bs_info = (*it)->FindMatchingBioseq(idh);
442 if ( bs_info ) {
443 return bs_info->GetBioseqLength();
444 }
445 }
446 return kInvalidSeqPos;
447 }
448
449
GetSequenceType(const CSeq_id_Handle & idh)450 CSeq_inst::TMol CDataLoader::GetSequenceType(const CSeq_id_Handle& idh)
451 {
452 // default implementation based on GetRecordsNoBlobState()
453 TTSE_LockSet locks = GetRecordsNoBlobState(idh, eBioseqCore);
454 ITERATE(TTSE_LockSet, it, locks) {
455 CConstRef<CBioseq_Info> bs_info = (*it)->FindMatchingBioseq(idh);
456 if ( bs_info ) {
457 CSeq_inst::TMol type = bs_info->GetInst_Mol();
458 if ( type == CSeq_inst::eMol_not_set ) {
459 NCBI_THROW(CLoaderException, eNoData,
460 "CDataLoader::GetSequenceType() type not set");
461 }
462 return type;
463 }
464 }
465 NCBI_THROW(CLoaderException, eNotFound,
466 "CDataLoader::GetSequenceType() sequence not found");
467 }
468
469
470 CDataLoader::STypeFound
GetSequenceTypeFound(const CSeq_id_Handle & idh)471 CDataLoader::GetSequenceTypeFound(const CSeq_id_Handle& idh)
472 {
473 // default implementation based on GetSequenceType() and GetIds()
474 STypeFound ret;
475 try {
476 ret.type = GetSequenceType(idh);
477 ret.sequence_found =
478 ret.type != CSeq_inst::eMol_not_set || SequenceExists(idh);
479 }
480 catch ( CLoaderException& exc ) {
481 if ( exc.GetErrCode() == exc.eNotFound ) {
482 // no sequence
483 }
484 else if ( exc.GetErrCode() == exc.eNoData ) {
485 // sequence is known, but there is no type
486 ret.sequence_found = true;
487 }
488 else {
489 // problem
490 throw;
491 }
492 }
493 return ret;
494 }
495
496
GetSequenceState(const CSeq_id_Handle & idh)497 int CDataLoader::GetSequenceState(const CSeq_id_Handle& idh)
498 {
499 try {
500 TTSE_LockSet locks = GetRecords(idh, eBioseqCore);
501 ITERATE(TTSE_LockSet, it, locks) {
502 CConstRef<CBioseq_Info> bs_info = (*it)->FindMatchingBioseq(idh);
503 if ( bs_info ) {
504 return (*it)->GetBlobState();
505 }
506 }
507 return CBioseq_Handle::fState_not_found|CBioseq_Handle::fState_no_data;
508 }
509 catch ( CBlobStateException& exc ) {
510 return exc.GetBlobState();
511 }
512 }
513
514
GetSequenceHash(const CSeq_id_Handle & idh)515 int CDataLoader::GetSequenceHash(const CSeq_id_Handle& idh)
516 {
517 if ( SequenceExists(idh) ) {
518 NCBI_THROW(CLoaderException, eNoData,
519 "CDataLoader::GetSequenceHash() sequence hash not set");
520 }
521 NCBI_THROW(CLoaderException, eNotFound,
522 "CDataLoader::GetSequenceHash() sequence not found");
523 }
524
525
526 CDataLoader::SHashFound
GetSequenceHashFound(const CSeq_id_Handle & idh)527 CDataLoader::GetSequenceHashFound(const CSeq_id_Handle& idh)
528 {
529 // default implementation based on GetSequenceHash() and GetIds()
530 SHashFound ret;
531 try {
532 ret.hash = GetSequenceHash(idh);
533 if ( !ret.hash ) {
534 // hash = 0, we don't know what causes it:
535 // absence of sequence, unknown hash, or the hash happens to be 0.
536 ret.sequence_found = SequenceExists(idh);
537 }
538 else {
539 ret.sequence_found = true;
540 ret.hash_known = true;
541 }
542 }
543 catch ( CLoaderException& exc ) {
544 if ( exc.GetErrCode() == exc.eNotFound ) {
545 // no sequence found
546 }
547 else if ( exc.GetErrCode() == exc.eNoData ) {
548 // sequence exists
549 ret.sequence_found = true;
550 }
551 else {
552 // problem
553 throw;
554 }
555 }
556 return ret;
557 }
558
559
GetAccVers(const TIds & ids,TLoaded & loaded,TIds & ret)560 void CDataLoader::GetAccVers(const TIds& ids, TLoaded& loaded, TIds& ret)
561 {
562 size_t count = ids.size();
563 _ASSERT(ids.size() == loaded.size());
564 _ASSERT(ids.size() == ret.size());
565 TIds seq_ids;
566 for ( size_t i = 0; i < count; ++i ) {
567 if ( loaded[i] ) {
568 continue;
569 }
570 SAccVerFound data = GetAccVerFound(ids[i]);
571 if ( data.sequence_found ) {
572 ret[i] = data.acc_ver;
573 loaded[i] = true;
574 }
575 }
576 }
577
578
GetGis(const TIds & ids,TLoaded & loaded,TGis & ret)579 void CDataLoader::GetGis(const TIds& ids, TLoaded& loaded, TGis& ret)
580 {
581 size_t count = ids.size();
582 _ASSERT(ids.size() == loaded.size());
583 _ASSERT(ids.size() == ret.size());
584 TIds seq_ids;
585 for ( size_t i = 0; i < count; ++i ) {
586 if ( loaded[i] ) {
587 continue;
588 }
589 SGiFound data = GetGiFound(ids[i]);
590 if ( data.sequence_found ) {
591 ret[i] = data.gi;
592 loaded[i] = true;
593 }
594 }
595 }
596
597
GetLabels(const TIds & ids,TLoaded & loaded,TLabels & ret)598 void CDataLoader::GetLabels(const TIds& ids, TLoaded& loaded, TLabels& ret)
599 {
600 size_t count = ids.size();
601 _ASSERT(ids.size() == loaded.size());
602 _ASSERT(ids.size() == ret.size());
603 for ( size_t i = 0; i < count; ++i ) {
604 if ( loaded[i] ) {
605 continue;
606 }
607 string label = GetLabel(ids[i]);
608 if ( !label.empty() ) {
609 ret[i] = label;
610 loaded[i] = true;
611 }
612 }
613 }
614
615
GetTaxIds(const TIds & ids,TLoaded & loaded,TTaxIds & ret)616 void CDataLoader::GetTaxIds(const TIds& ids, TLoaded& loaded, TTaxIds& ret)
617 {
618 size_t count = ids.size();
619 _ASSERT(ids.size() == loaded.size());
620 _ASSERT(ids.size() == ret.size());
621 for ( size_t i = 0; i < count; ++i ) {
622 if ( loaded[i] ) {
623 continue;
624 }
625
626 TTaxId taxid = GetTaxId(ids[i]);
627 if ( taxid != INVALID_TAX_ID ) {
628 ret[i] = taxid;
629 loaded[i] = true;
630 }
631 }
632 }
633
634
GetSequenceLengths(const TIds & ids,TLoaded & loaded,TSequenceLengths & ret)635 void CDataLoader::GetSequenceLengths(const TIds& ids, TLoaded& loaded,
636 TSequenceLengths& ret)
637 {
638 size_t count = ids.size();
639 _ASSERT(ids.size() == loaded.size());
640 _ASSERT(ids.size() == ret.size());
641 for ( size_t i = 0; i < count; ++i ) {
642 if ( loaded[i] ) {
643 continue;
644 }
645
646 TSeqPos len = GetSequenceLength(ids[i]);
647 if ( len != kInvalidSeqPos ) {
648 ret[i] = len;
649 loaded[i] = true;
650 }
651 }
652 }
653
654
GetSequenceTypes(const TIds & ids,TLoaded & loaded,TSequenceTypes & ret)655 void CDataLoader::GetSequenceTypes(const TIds& ids, TLoaded& loaded,
656 TSequenceTypes& ret)
657 {
658 size_t count = ids.size();
659 _ASSERT(ids.size() == loaded.size());
660 _ASSERT(ids.size() == ret.size());
661 for ( size_t i = 0; i < count; ++i ) {
662 if ( loaded[i] ) {
663 continue;
664 }
665
666 STypeFound data = GetSequenceTypeFound(ids[i]);
667 if ( data.sequence_found ) {
668 ret[i] = data.type;
669 loaded[i] = true;
670 }
671 }
672 }
673
674
GetSequenceStates(const TIds & ids,TLoaded & loaded,TSequenceStates & ret)675 void CDataLoader::GetSequenceStates(const TIds& ids, TLoaded& loaded,
676 TSequenceStates& ret)
677 {
678 const int kNotFound = (CBioseq_Handle::fState_not_found |
679 CBioseq_Handle::fState_no_data);
680
681 size_t count = ids.size();
682 _ASSERT(ids.size() == loaded.size());
683 _ASSERT(ids.size() == ret.size());
684 for ( size_t i = 0; i < count; ++i ) {
685 if ( loaded[i] ) {
686 continue;
687 }
688
689 int state = GetSequenceState(ids[i]);
690 if ( state != kNotFound ) {
691 ret[i] = state;
692 loaded[i] = true;
693 }
694 }
695 }
696
697
GetSequenceHashes(const TIds & ids,TLoaded & loaded,TSequenceHashes & ret,THashKnown & known)698 void CDataLoader::GetSequenceHashes(const TIds& ids, TLoaded& loaded,
699 TSequenceHashes& ret, THashKnown& known)
700 {
701 size_t count = ids.size();
702 _ASSERT(ids.size() == loaded.size());
703 _ASSERT(ids.size() == ret.size());
704 for ( size_t i = 0; i < count; ++i ) {
705 if ( loaded[i] ) {
706 continue;
707 }
708
709 SHashFound data = GetSequenceHashFound(ids[i]);
710 if ( data.sequence_found ) {
711 ret[i] = data.hash;
712 loaded[i] = true;
713 known[i] = data.hash_known;
714 }
715 }
716 }
717
718
GetBlobs(TTSE_LockSets & tse_sets)719 void CDataLoader::GetBlobs(TTSE_LockSets& tse_sets)
720 {
721 NON_CONST_ITERATE(TTSE_LockSets, tse_set, tse_sets) {
722 tse_set->second = GetRecords(tse_set->first, eBlob);
723 }
724 }
725
726
727 CDataLoader::EChoice
DetailsToChoice(const SRequestDetails::TAnnotSet & annots) const728 CDataLoader::DetailsToChoice(const SRequestDetails::TAnnotSet& annots) const
729 {
730 EChoice ret = eCore;
731 ITERATE ( SRequestDetails::TAnnotSet, i, annots ) {
732 ITERATE ( SRequestDetails::TAnnotTypesSet, j, i->second ) {
733 EChoice cur = eCore;
734 switch ( j->GetAnnotType() ) {
735 case CSeq_annot::C_Data::e_Ftable:
736 cur = eFeatures;
737 break;
738 case CSeq_annot::C_Data::e_Graph:
739 cur = eGraph;
740 break;
741 case CSeq_annot::C_Data::e_Align:
742 cur = eAlign;
743 break;
744 case CSeq_annot::C_Data::e_not_set:
745 return eAnnot;
746 default:
747 break;
748 }
749 if ( cur != eCore && cur != ret ) {
750 if ( ret != eCore ) return eAnnot;
751 ret = cur;
752 }
753 }
754 }
755 return ret;
756 }
757
758
759 CDataLoader::EChoice
DetailsToChoice(const SRequestDetails & details) const760 CDataLoader::DetailsToChoice(const SRequestDetails& details) const
761 {
762 EChoice ret = DetailsToChoice(details.m_NeedAnnots);
763 switch ( details.m_AnnotBlobType ) {
764 case SRequestDetails::fAnnotBlobNone:
765 // no annotations
766 ret = eCore;
767 break;
768 case SRequestDetails::fAnnotBlobInternal:
769 // no change
770 break;
771 case SRequestDetails::fAnnotBlobExternal:
772 // shift from internal to external annotations
773 _ASSERT(ret >= eFeatures && ret <= eAnnot);
774 ret = EChoice(ret + eExtFeatures - eFeatures);
775 _ASSERT(ret >= eExtFeatures && ret <= eExtAnnot);
776 break;
777 case SRequestDetails::fAnnotBlobOrphan:
778 // all orphan annots
779 ret = eOrphanAnnot;
780 break;
781 default:
782 // all other cases -> eAll
783 ret = eAll;
784 break;
785 }
786 if ( !details.m_NeedSeqMap.Empty() || !details.m_NeedSeqData.Empty() ) {
787 // include sequence
788 if ( ret == eCore ) {
789 ret = eSequence;
790 }
791 else if ( ret >= eFeatures && ret <= eAnnot ) {
792 // only internal annot + sequence -> whole blob
793 ret = eBlob;
794 }
795 else {
796 // all blobs
797 ret = eAll;
798 }
799 }
800 return ret;
801 }
802
803
ChoiceToDetails(EChoice choice) const804 SRequestDetails CDataLoader::ChoiceToDetails(EChoice choice) const
805 {
806 SRequestDetails details;
807 CSeq_annot::C_Data::E_Choice type = CSeq_annot::C_Data::e_not_set;
808 bool sequence = false;
809 switch ( choice ) {
810 case eAll:
811 sequence = true;
812 // from all blobs
813 details.m_AnnotBlobType = SRequestDetails::fAnnotBlobAll;
814 break;
815 case eBlob:
816 case eBioseq:
817 case eBioseqCore:
818 sequence = true;
819 // internal only
820 details.m_AnnotBlobType = SRequestDetails::fAnnotBlobInternal;
821 break;
822 case eSequence:
823 sequence = true;
824 break;
825 case eAnnot:
826 // internal only
827 details.m_AnnotBlobType = SRequestDetails::fAnnotBlobInternal;
828 break;
829 case eGraph:
830 type = CSeq_annot::C_Data::e_Graph;
831 // internal only
832 details.m_AnnotBlobType = SRequestDetails::fAnnotBlobInternal;
833 break;
834 case eFeatures:
835 type = CSeq_annot::C_Data::e_Ftable;
836 // internal only
837 details.m_AnnotBlobType = SRequestDetails::fAnnotBlobInternal;
838 break;
839 case eAlign:
840 type = CSeq_annot::C_Data::e_Align;
841 // internal only
842 details.m_AnnotBlobType = SRequestDetails::fAnnotBlobInternal;
843 break;
844 case eExtAnnot:
845 // external only
846 details.m_AnnotBlobType = SRequestDetails::fAnnotBlobExternal;
847 break;
848 case eExtGraph:
849 type = CSeq_annot::C_Data::e_Graph;
850 // external only
851 details.m_AnnotBlobType = SRequestDetails::fAnnotBlobExternal;
852 break;
853 case eExtFeatures:
854 type = CSeq_annot::C_Data::e_Ftable;
855 // external only
856 details.m_AnnotBlobType = SRequestDetails::fAnnotBlobExternal;
857 break;
858 case eExtAlign:
859 type = CSeq_annot::C_Data::e_Align;
860 // external only
861 details.m_AnnotBlobType = SRequestDetails::fAnnotBlobExternal;
862 break;
863 case eOrphanAnnot:
864 // orphan annotations only
865 details.m_AnnotBlobType = SRequestDetails::fAnnotBlobOrphan;
866 break;
867 default:
868 break;
869 }
870 if ( sequence ) {
871 details.m_NeedSeqMap = SRequestDetails::TRange::GetWhole();
872 details.m_NeedSeqData = SRequestDetails::TRange::GetWhole();
873 }
874 if ( details.m_AnnotBlobType != SRequestDetails::fAnnotBlobNone ) {
875 details.m_NeedAnnots[CAnnotName()].insert(SAnnotTypeSelector(type));
876 }
877 return details;
878 }
879
880
GetChunk(TChunk)881 void CDataLoader::GetChunk(TChunk /*chunk_info*/)
882 {
883 NCBI_THROW(CLoaderException, eNotImplemented,
884 "CDataLoader::GetChunk() is not implemented in subclass");
885 }
886
887
GetChunks(const TChunkSet & chunks)888 void CDataLoader::GetChunks(const TChunkSet& chunks)
889 {
890 ITERATE ( TChunkSet, it, chunks ) {
891 GetChunk(*it);
892 }
893 }
894
895
896 CDataLoader::TTSE_Lock
ResolveConflict(const CSeq_id_Handle &,const TTSE_LockSet &)897 CDataLoader::ResolveConflict(const CSeq_id_Handle& /*id*/,
898 const TTSE_LockSet& /*tse_set*/)
899 {
900 return TTSE_Lock();
901 }
902
903
GetBlobId(const CSeq_id_Handle &)904 CDataLoader::TBlobId CDataLoader::GetBlobId(const CSeq_id_Handle& /*sih*/)
905 {
906 return TBlobId();
907 }
908
909
GetBlobVersion(const TBlobId &)910 CDataLoader::TBlobVersion CDataLoader::GetBlobVersion(const TBlobId& /*id*/)
911 {
912 return 0;
913 }
914
GetEditSaver() const915 CDataLoader::TEditSaver CDataLoader::GetEditSaver() const
916 {
917 return TEditSaver();
918 }
919
920
GetDefaultPriority(void) const921 CObjectManager::TPriority CDataLoader::GetDefaultPriority(void) const
922 {
923 return CObjectManager::kPriority_Loader;
924 }
925
926
EstimateLoadBytes(const CTSE_Chunk_Info &) const927 Uint4 CDataLoader::EstimateLoadBytes(const CTSE_Chunk_Info& /*chunk*/) const
928 {
929 return 32000; // assume 32KB chunk size
930 }
931
932
EstimateLoadSeconds(const CTSE_Chunk_Info &,Uint4 bytes) const933 double CDataLoader::EstimateLoadSeconds(const CTSE_Chunk_Info& /*chunk*/, Uint4 bytes) const
934 {
935 return bytes*1e-7+0.001; // assume 10MB/s transfer speed and 1ms overhead
936 }
937
938
GetDefaultBlobCacheSizeLimit(void) const939 unsigned CDataLoader::GetDefaultBlobCacheSizeLimit(void) const
940 {
941 return kMax_UInt;
942 }
943
944
945 /////////////////////////////////////////////////////////////////////////////
946 // CBlobId
947
~CBlobId(void)948 CBlobId::~CBlobId(void)
949 {
950 }
951
LessByTypeId(const CBlobId & id2) const952 bool CBlobId::LessByTypeId(const CBlobId& id2) const
953 {
954 return typeid(*this).before(typeid(id2));
955 }
956
operator ==(const CBlobId & id) const957 bool CBlobId::operator==(const CBlobId& id) const
958 {
959 return !(*this < id || id < *this);
960 }
961
962
963 END_SCOPE(objects)
964 END_NCBI_SCOPE
965