1 /* $Id: vdbgraphloader_impl.cpp 595665 2019-10-24 17:13:35Z vasilche $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Eugene Vasilchenko
27 *
28 * File Description: data loader for VDB graph data
29 *
30 * ===========================================================================
31 */
32
33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbistd.hpp>
35
36 #include <objects/general/general__.hpp>
37 #include <objects/seqloc/Seq_id.hpp>
38 #include <objects/seq/seq__.hpp>
39 #include <objects/seqres/seqres__.hpp>
40
41 #include <objmgr/annot_selector.hpp>
42 #include <objmgr/impl/data_source.hpp>
43 #include <objmgr/impl/tse_loadlock.hpp>
44 #include <objmgr/impl/tse_split_info.hpp>
45 #include <objmgr/impl/tse_chunk_info.hpp>
46 #include <objmgr/data_loader_factory.hpp>
47 #include <corelib/plugin_manager_impl.hpp>
48 #include <corelib/plugin_manager_store.hpp>
49
50 #include <sra/readers/sra/graphread.hpp>
51 #include <sra/data_loaders/vdbgraph/vdbgraphloader.hpp>
52 #include <sra/data_loaders/vdbgraph/impl/vdbgraphloader_impl.hpp>
53 #include <sra/error_codes.hpp>
54
55 BEGIN_NCBI_SCOPE
56
57 #define NCBI_USE_ERRCODE_X VDBGraphLoader
58 NCBI_DEFINE_ERR_SUBCODE_X(8);
59
60 class CObject;
61
62 BEGIN_SCOPE(objects)
63
64 class CDataLoader;
65
66 #define OVERVIEW_NAME_SUFFIX "@@5000"
67 #define MID_ZOOM_NAME_SUFFIX "@@100"
68
69 static const TSeqPos kOverviewChunkSize = 20000*5000;
70 static const TSeqPos kMidZoomChunkSize = 20000*100;
71 static const TSeqPos kMainChunkSize = 100000;
72
73 static const size_t kOverviewChunkIdAdd = 0;
74 static const size_t kMidZoomChunkIdAdd = 1;
75 static const size_t kMainGraphChunkIdAdd = 2;
76 static const size_t kMainTableChunkIdAdd = 3;
77 static const size_t kChunkIdMul = 4;
78
79 static const int kTSEId = 1;
80
81 NCBI_PARAM_DECL(int, VDBGRAPH_LOADER, DEBUG);
82 NCBI_PARAM_DEF_EX(int, VDBGRAPH_LOADER, DEBUG, 0,
83 eParam_NoThread, VDBGRAPH_LOADER_DEBUG);
84
GetDebugLevel(void)85 static int GetDebugLevel(void)
86 {
87 static CSafeStatic<NCBI_PARAM_TYPE(VDBGRAPH_LOADER, DEBUG)> s_Value;
88 return s_Value->Get();
89 }
90
91
92 NCBI_PARAM_DECL(size_t, VDBGRAPH_LOADER, GC_SIZE);
93 NCBI_PARAM_DEF_EX(size_t, VDBGRAPH_LOADER, GC_SIZE, 10,
94 eParam_NoThread, VDBGRAPH_LOADER_GC_SIZE);
95
GetGCSize(void)96 static size_t GetGCSize(void)
97 {
98 static CSafeStatic<NCBI_PARAM_TYPE(VDBGRAPH_LOADER, GC_SIZE)> s_Value;
99 return s_Value->Get();
100 }
101
102
103 NCBI_PARAM_DECL(size_t, VDBGRAPH_LOADER, MISSING_GC_SIZE);
104 NCBI_PARAM_DEF_EX(size_t, VDBGRAPH_LOADER, MISSING_GC_SIZE, 10000,
105 eParam_NoThread, VDBGRAPH_LOADER_MISSING_GC_SIZE);
106
GetMissingGCSize(void)107 static size_t GetMissingGCSize(void)
108 {
109 static CSafeStatic<NCBI_PARAM_TYPE(VDBGRAPH_LOADER, MISSING_GC_SIZE)> s_Value;
110 return s_Value->Get();
111 }
112
113
114 NCBI_PARAM_DECL(int, VDBGRAPH_LOADER, USE_TABLE);
115 NCBI_PARAM_DEF_EX(int, VDBGRAPH_LOADER, USE_TABLE, 2,
116 eParam_NoThread, VDBGRAPH_LOADER_USE_TABLE);
117
GetUseTable(void)118 static int GetUseTable(void)
119 {
120 static CSafeStatic<NCBI_PARAM_TYPE(VDBGRAPH_LOADER, USE_TABLE)> s_Value;
121 return s_Value->Get();
122 }
123
124
125 NCBI_PARAM_DECL(bool, VDBGRAPH_LOADER, DISABLE_ZOOM);
126 NCBI_PARAM_DEF_EX(bool, VDBGRAPH_LOADER, DISABLE_ZOOM, false,
127 eParam_NoThread, VDBGRAPH_LOADER_DISABLE_ZOOM);
128
GetDisabledZoom(void)129 static bool GetDisabledZoom(void)
130 {
131 static CSafeStatic<NCBI_PARAM_TYPE(VDBGRAPH_LOADER, DISABLE_ZOOM)> s_Value;
132 return s_Value->Get();
133 }
134
135
GetEnabledOverview(void)136 static bool GetEnabledOverview(void)
137 {
138 return !GetDisabledZoom();
139 }
140
141
GetEnabledMidZoom(void)142 static bool GetEnabledMidZoom(void)
143 {
144 return !GetDisabledZoom();
145 }
146
147
148 NCBI_PARAM_DECL(int, VDBGRAPH_LOADER, LOOKUP_TYPE);
149 NCBI_PARAM_DEF_EX(int, VDBGRAPH_LOADER, LOOKUP_TYPE, CVDBGraphDb_Impl::eLookupDefault,
150 eParam_NoThread, VDBGRAPH_LOADER_LOOKUP_TYPE);
151
GetLookupType(void)152 static CVDBGraphDb_Impl::ELookupType GetLookupType(void)
153 {
154 static int lookup_type = NCBI_PARAM_TYPE(VDBGRAPH_LOADER, LOOKUP_TYPE)::GetDefault();
155 return CVDBGraphDb_Impl::ELookupType(lookup_type);
156 }
157
158
159 /////////////////////////////////////////////////////////////////////////////
160 // CVDBGraphBlobId
161 /////////////////////////////////////////////////////////////////////////////
162
163 class CVDBGraphBlobId : public CBlobId
164 {
165 public:
166 CVDBGraphBlobId(const string& file, const CSeq_id_Handle& id);
167 ~CVDBGraphBlobId(void);
168
169 string m_VDBFile;
170 CSeq_id_Handle m_SeqId;
171 CRef<CVDBGraphDataLoader_Impl::SVDBFileInfo> m_FileInfo;
172
173 string ToString(void) const;
174 CVDBGraphBlobId(const string& str);
175
176 bool operator<(const CBlobId& id) const;
177 bool operator==(const CBlobId& id) const;
178 };
179
180
CVDBGraphBlobId(const string & file,const CSeq_id_Handle & id)181 CVDBGraphBlobId::CVDBGraphBlobId(const string& file, const CSeq_id_Handle& id)
182 : m_VDBFile(file),
183 m_SeqId(id)
184 {
185 }
186
187
~CVDBGraphBlobId(void)188 CVDBGraphBlobId::~CVDBGraphBlobId(void)
189 {
190 }
191
192
ToString(void) const193 string CVDBGraphBlobId::ToString(void) const
194 {
195 CNcbiOstrstream out;
196 out << m_VDBFile << '\0' << m_SeqId;
197 return CNcbiOstrstreamToString(out);
198 }
199
200
CVDBGraphBlobId(const string & str)201 CVDBGraphBlobId::CVDBGraphBlobId(const string& str)
202 {
203 SIZE_TYPE pos1 = str.find('\0');
204 m_VDBFile = str.substr(0, pos1);
205 m_SeqId = CSeq_id_Handle::GetHandle(str.substr(pos1+1));
206 }
207
208
operator <(const CBlobId & id) const209 bool CVDBGraphBlobId::operator<(const CBlobId& id) const
210 {
211 const CVDBGraphBlobId& sra2 = dynamic_cast<const CVDBGraphBlobId&>(id);
212 return m_SeqId < sra2.m_SeqId ||
213 (m_SeqId == sra2.m_SeqId && m_VDBFile < sra2.m_VDBFile);
214 }
215
216
operator ==(const CBlobId & id) const217 bool CVDBGraphBlobId::operator==(const CBlobId& id) const
218 {
219 const CVDBGraphBlobId& sra2 = dynamic_cast<const CVDBGraphBlobId&>(id);
220 return m_SeqId == sra2.m_SeqId && m_VDBFile == sra2.m_VDBFile;
221 }
222
223
224 /////////////////////////////////////////////////////////////////////////////
225 // CVDBGraphDataLoader_Impl
226 /////////////////////////////////////////////////////////////////////////////
227
228
CVDBGraphDataLoader_Impl(const TVDBFiles & vdb_files)229 CVDBGraphDataLoader_Impl::CVDBGraphDataLoader_Impl(const TVDBFiles& vdb_files)
230 : m_AutoFileMap(GetGCSize()),
231 m_MissingFileSet(GetMissingGCSize())
232 {
233 ITERATE ( TVDBFiles, it, vdb_files ) {
234 if ( GetDebugLevel() >= 2 ) {
235 LOG_POST_X(1, "CVDBGraphDataLoader: opening explicit file "<<*it);
236 }
237 CRef<SVDBFileInfo> info(new SVDBFileInfo);
238 info->m_VDBFile = *it;
239 info->m_BaseAnnotName = CDirEntry(*it).GetName();
240 info->m_VDB = CVDBGraphDb(m_Mgr, *it, GetLookupType());
241 m_FixedFileMap[*it] = info;
242 }
243 }
244
245
~CVDBGraphDataLoader_Impl(void)246 CVDBGraphDataLoader_Impl::~CVDBGraphDataLoader_Impl(void)
247 {
248 }
249
250
ContainsAnnotsFor(const CSeq_id_Handle & id) const251 bool CVDBGraphDataLoader_Impl::SVDBFileInfo::ContainsAnnotsFor(const CSeq_id_Handle& id) const
252 {
253 return CVDBGraphSeqIterator(m_VDB, id);
254 }
255
256
GetMainAnnotName(void) const257 string CVDBGraphDataLoader_Impl::SVDBFileInfo::GetMainAnnotName(void) const
258 {
259 return m_BaseAnnotName;
260 }
261
262
GetOverviewAnnotName(void) const263 string CVDBGraphDataLoader_Impl::SVDBFileInfo::GetOverviewAnnotName(void) const
264 {
265 return m_BaseAnnotName+OVERVIEW_NAME_SUFFIX;
266 }
267
268
GetMidZoomAnnotName(void) const269 string CVDBGraphDataLoader_Impl::SVDBFileInfo::GetMidZoomAnnotName(void) const
270 {
271 return m_BaseAnnotName+MID_ZOOM_NAME_SUFFIX;
272 }
273
274
GetDefaultPriority(void) const275 CObjectManager::TPriority CVDBGraphDataLoader_Impl::GetDefaultPriority(void) const
276 {
277 CObjectManager::TPriority priority = CObjectManager::kPriority_Extra;
278 if ( m_FixedFileMap.empty() ) {
279 // implicit loading data loader has lower priority by default
280 priority += 1;
281 }
282 return priority;
283 }
284
285
286 CVDBGraphDataLoader_Impl::TAnnotNames
GetPossibleAnnotNames(void) const287 CVDBGraphDataLoader_Impl::GetPossibleAnnotNames(void) const
288 {
289 TAnnotNames names;
290 ITERATE ( TFixedFileMap, it, m_FixedFileMap ) {
291 const SVDBFileInfo& info = *it->second;
292 names.push_back(CAnnotName(info.GetMainAnnotName()));
293 names.push_back(CAnnotName(info.GetMidZoomAnnotName()));
294 names.push_back(CAnnotName(info.GetOverviewAnnotName()));
295 }
296 sort(names.begin(), names.end());
297 names.erase(unique(names.begin(), names.end()), names.end());
298 return names;
299 }
300
301
302 CDataLoader::TBlobId
GetBlobId(const CSeq_id_Handle &)303 CVDBGraphDataLoader_Impl::GetBlobId(const CSeq_id_Handle& /*idh*/)
304 {
305 // no blobs with sequence
306 return CDataLoader::TBlobId();
307 }
308
309
310 CDataLoader::TBlobId
GetBlobIdFromString(const string & str) const311 CVDBGraphDataLoader_Impl::GetBlobIdFromString(const string& str) const
312 {
313 return CDataLoader::TBlobId(new CVDBGraphBlobId(str));
314 }
315
316
317 CDataLoader::TTSE_Lock
GetBlobById(CDataSource * ds,const CDataLoader::TBlobId & blob_id0)318 CVDBGraphDataLoader_Impl::GetBlobById(CDataSource* ds,
319 const CDataLoader::TBlobId& blob_id0)
320 {
321 CTSE_LoadLock load_lock = ds->GetTSE_LoadLock(blob_id0);
322 if ( !load_lock.IsLoaded() ) {
323 const CVDBGraphBlobId& blob_id =
324 dynamic_cast<const CVDBGraphBlobId&>(*blob_id0);
325 if ( 1 ) {
326 LoadSplitEntry(*load_lock, blob_id);
327 }
328 else {
329 load_lock->SetSeq_entry(*LoadFullEntry(blob_id));
330 }
331 load_lock.SetLoaded();
332 }
333 return load_lock;
334 }
335
336
337 CDataLoader::TTSE_LockSet
GetRecords(CDataSource * ds,const CSeq_id_Handle & id,CDataLoader::EChoice choice)338 CVDBGraphDataLoader_Impl::GetRecords(CDataSource* ds,
339 const CSeq_id_Handle& id,
340 CDataLoader::EChoice choice)
341 {
342 if ( choice == CDataLoader::eOrphanAnnot ||
343 choice == CDataLoader::eAll ) {
344 return GetOrphanAnnotRecords(ds, id, 0, 0);
345 }
346 return TTSE_LockSet();
347 }
348
349
350 CDataLoader::TTSE_LockSet
GetOrphanAnnotRecords(CDataSource * ds,const CSeq_id_Handle & id,const SAnnotSelector * sel,CDataLoader::TProcessedNAs * processed_nas)351 CVDBGraphDataLoader_Impl::GetOrphanAnnotRecords(CDataSource* ds,
352 const CSeq_id_Handle& id,
353 const SAnnotSelector* sel,
354 CDataLoader::TProcessedNAs* processed_nas)
355 {
356 TTSE_LockSet locks;
357 // explicitly specified files
358 for ( auto& it : m_FixedFileMap ) {
359 if ( !sel || CDataLoader::IsRequestedNA(it.second->m_BaseAnnotName, sel) ) {
360 CDataLoader::SetProcessedNA(it.second->m_BaseAnnotName, processed_nas);
361 if ( it.second->ContainsAnnotsFor(id) ) {
362 TBlobId blob_id(new CVDBGraphBlobId(it.second->m_VDBFile, id));
363 locks.insert(GetBlobById(ds, blob_id));
364 }
365 }
366 }
367 // implicitly load NA accessions
368 if ( m_FixedFileMap.empty() && CDataLoader::IsRequestedAnyNA(sel) ) {
369 const SAnnotSelector::TNamedAnnotAccessions& accs =
370 sel->GetNamedAnnotAccessions();
371 if ( m_AutoFileMap.get_size_limit() < accs.size() ) {
372 // increase VDB cache size
373 m_AutoFileMap.set_size_limit(accs.size()+GetGCSize());
374 }
375 if ( m_MissingFileSet.get_size_limit() < accs.size() ) {
376 // increase VDB cache size
377 m_MissingFileSet.set_size_limit(accs.size()+GetMissingGCSize());
378 }
379 ITERATE ( SAnnotSelector::TNamedAnnotAccessions, it, accs ) {
380 if ( 1 ) {
381 TBlobId blob_id(new CVDBGraphBlobId(it->first, id));
382 if ( CTSE_LoadLock lock = ds->GetTSE_LoadLockIfLoaded(blob_id) ) {
383 CDataLoader::SetProcessedNA(it->first, processed_nas);
384 locks.insert(GetBlobById(ds, blob_id));
385 continue;
386 }
387 }
388 SVDBFileInfo* file = x_GetNAFileInfo(it->first);
389 if ( file ) {
390 CDataLoader::SetProcessedNA(it->first, processed_nas);
391 if ( file->ContainsAnnotsFor(id) ) {
392 TBlobId blob_id(new CVDBGraphBlobId(file->m_VDBFile, id));
393 locks.insert(GetBlobById(ds, blob_id));
394 }
395 }
396 }
397 }
398 return locks;
399 }
400
401
402 CRef<CSeq_entry>
LoadFullEntry(const CVDBGraphBlobId & blob_id)403 CVDBGraphDataLoader_Impl::LoadFullEntry(const CVDBGraphBlobId& blob_id)
404 {
405 if ( GetDebugLevel() >= 5 ) {
406 LOG_POST_X(4, "CVDBGraphDataLoader: "
407 "loading full entry for "<<blob_id.m_SeqId);
408 }
409 CRef<SVDBFileInfo> info_ref = x_GetFileInfo(blob_id.m_VDBFile);
410 SVDBFileInfo& info = *info_ref;
411 CVDBGraphSeqIterator it(info.m_VDB, blob_id.m_SeqId);
412 if ( !it ) {
413 return null;
414 }
415 CRef<CSeq_entry> entry(new CSeq_entry);
416 entry->SetSet().SetSeq_set();
417 CRange<TSeqPos> range = CRange<TSeqPos>::GetWhole();
418 CBioseq_set::TAnnot& dst = entry->SetSet().SetAnnot();
419 CVDBGraphSeqIterator::TContentFlags overview_flags = it.fGraphQAll;
420 CVDBGraphSeqIterator::TContentFlags mid_zoom_flags = it.fGraphZoomQAll;
421 CVDBGraphSeqIterator::TContentFlags main_flags = it.fGraphMain;
422 if ( GetUseTable() == 2 ||
423 (GetUseTable() == 1 && it.SeqTableIsSmaller(range)) ) {
424 main_flags |= it.fGraphMainAsTable;
425 }
426 if ( GetEnabledOverview() ) {
427 dst.push_back(it.GetAnnot(range,
428 info.GetMainAnnotName(),
429 overview_flags));
430 }
431 if ( GetEnabledMidZoom() && info.m_VDB->HasMidZoomGraphs() ) {
432 dst.push_back(it.GetAnnot(range,
433 info.GetMainAnnotName(),
434 mid_zoom_flags));
435 }
436 dst.push_back(it.GetAnnot(range,
437 info.GetMainAnnotName(),
438 main_flags));
439 return entry;
440 }
441
442
LoadSplitEntry(CTSE_Info & tse,const CVDBGraphBlobId & blob_id)443 void CVDBGraphDataLoader_Impl::LoadSplitEntry(CTSE_Info& tse,
444 const CVDBGraphBlobId& blob_id)
445 {
446 if ( GetDebugLevel() >= 5 ) {
447 LOG_POST_X(5, "CVDBGraphDataLoader: "
448 "loading split entry for "<<blob_id.m_SeqId);
449 }
450 CRef<SVDBFileInfo> info_ref = x_GetFileInfo(blob_id.m_VDBFile);
451 const_cast<CVDBGraphBlobId&>(blob_id).m_FileInfo = info_ref;
452 SVDBFileInfo& info = *info_ref;
453 CVDBGraphSeqIterator it(info.m_VDB, blob_id.m_SeqId);
454 if ( !it ) {
455 return;
456 }
457 CRef<CSeq_entry> entry(new CSeq_entry);
458 entry->SetSet().SetSeq_set();
459 entry->SetSet().SetId().SetId(kTSEId);
460 tse.SetSeq_entry(*entry);
461 TSeqPos length = it.GetSeqLength();
462 size_t kIdAdd[3] = {
463 kOverviewChunkIdAdd,
464 kMidZoomChunkIdAdd,
465 kMainGraphChunkIdAdd
466 };
467 if ( GetUseTable() == 2 ||
468 (GetUseTable() == 1 &&
469 it.SeqTableIsSmaller(CRange<TSeqPos>::GetWhole())) ) {
470 kIdAdd[2] = kMainTableChunkIdAdd;
471 }
472 static const TSeqPos kSize[3] = {
473 kOverviewChunkSize,
474 kMidZoomChunkSize,
475 kMainChunkSize
476 };
477 CAnnotName kName[3] = {
478 info.GetOverviewAnnotName(),
479 info.GetMidZoomAnnotName(),
480 info.GetMainAnnotName()
481 };
482 CTSE_Split_Info& split_info = tse.GetSplitInfo();
483 CTSE_Chunk_Info::TPlace place(CSeq_id_Handle(), kTSEId);
484 for ( int k = 0; k < 3; ++k ) {
485 if ( kIdAdd[k] == kOverviewChunkIdAdd &&
486 !GetEnabledOverview() ) {
487 continue;
488 }
489 if ( kIdAdd[k] == kMidZoomChunkIdAdd &&
490 !(GetEnabledMidZoom() && info.m_VDB->HasMidZoomGraphs()) ) {
491 continue;
492 }
493 for ( int i = 0; i*kSize[k] < length; ++i ) {
494 TSeqPos from = i*kSize[k], to_open = min(length, from+kSize[k]);
495 COpenRange<TSeqPos> range(from, to_open);
496 size_t id_add = kIdAdd[k];
497 CSeq_annot::TData::E_Choice type = CSeq_annot::C_Data::e_Graph;
498 if ( id_add == kMainTableChunkIdAdd ) {
499 type = CSeq_annot::C_Data::e_Seq_table;
500 }
501 int chunk_id = int(i*kChunkIdMul+id_add);
502 CRef<CTSE_Chunk_Info> chunk(new CTSE_Chunk_Info(chunk_id));
503 chunk->x_AddAnnotType(kName[k], type, it.GetSeq_id_Handle(), range);
504 chunk->x_AddAnnotPlace(place);
505 split_info.AddChunk(*chunk);
506 }
507 }
508 }
509
510
GetChunk(CTSE_Chunk_Info & chunk)511 void CVDBGraphDataLoader_Impl::GetChunk(CTSE_Chunk_Info& chunk)
512 {
513 const CVDBGraphBlobId& blob_id =
514 dynamic_cast<const CVDBGraphBlobId&>(*chunk.GetBlobId());
515 CRef<SVDBFileInfo> info_ref = blob_id.m_FileInfo;
516 if ( !info_ref ) {
517 info_ref = x_GetFileInfo(blob_id.m_VDBFile);
518 }
519 SVDBFileInfo& info = *info_ref;
520 CVDBGraphSeqIterator it(info.m_VDB, blob_id.m_SeqId);
521 if ( !it ) {
522 return;
523 }
524 TSeqPos length = it.GetSeqLength();
525
526 static const TSeqPos kSize[kChunkIdMul] = {
527 kOverviewChunkSize,
528 kMidZoomChunkSize,
529 kMainChunkSize,
530 kMainChunkSize
531 };
532 static const CVDBGraphSeqIterator::TContentFlags kFlags[kChunkIdMul] = {
533 CVDBGraphSeqIterator::fGraphQAll,
534 CVDBGraphSeqIterator::fGraphZoomQAll,
535 CVDBGraphSeqIterator::fGraphMain,
536 CVDBGraphSeqIterator::fGraphMain|CVDBGraphSeqIterator::fGraphMainAsTable
537 };
538 static const char* const kTypeName[kChunkIdMul] = {
539 "overview",
540 "mid-zoom",
541 "main graph",
542 "main table"
543 };
544 string name = info.GetMainAnnotName();
545 int k = chunk.GetChunkId()%kChunkIdMul;
546 int i = chunk.GetChunkId()/kChunkIdMul;
547 TSeqPos from = i*kSize[k], to_open = min(length, from+kSize[k]);
548 if ( GetDebugLevel() >= 6 ) {
549 LOG_POST_X(6, "CVDBGraphDataLoader: "
550 "loading "<<kTypeName[k]<<" chunk "<<blob_id.m_SeqId<<
551 " @ "<<from<<"-"<<(to_open-1));
552 }
553 CVDBGraphSeqIterator::TContentFlags flags = kFlags[k];
554 CRef<CSeq_annot> annot =
555 it.GetAnnot(COpenRange<TSeqPos>(from, to_open), name, flags);
556 if ( GetDebugLevel() >= 6 ) {
557 LOG_POST_X(7, "CVDBGraphDataLoader: "
558 "loaded "<<kTypeName[k]<<" chunk "<<blob_id.m_SeqId<<
559 " @ "<<from<<"-"<<(to_open-1)<<": "<<MSerial_AsnText<<*annot);
560 }
561 CTSE_Chunk_Info::TPlace place(CSeq_id_Handle(), kTSEId);
562 chunk.x_LoadAnnot(place, *annot);
563 chunk.SetLoaded();
564 }
565
566
sx_IsNA(const string & s)567 static bool sx_IsNA(const string& s)
568 {
569 // NA%09d.%d
570 if ( s.size() < 13 ) {
571 return false;
572 }
573 if ( s[0] != 'N' || s[1] != 'A' || s[11] != '.' ) {
574 return false;
575 }
576 for ( int i = 0; i < 9; ++i ) {
577 if ( !isdigit(s[i+2]&0xff) ) {
578 return false;
579 }
580 }
581 return NStr::StringToNonNegativeInt(s.substr(12)) > 0;
582 }
583
584
585 CRef<CVDBGraphDataLoader_Impl::SVDBFileInfo>
x_GetNAFileInfo(const string & na_acc)586 CVDBGraphDataLoader_Impl::x_GetNAFileInfo(const string& na_acc)
587 {
588 if ( !m_FixedFileMap.empty() ) {
589 return null;
590 }
591 if ( !sx_IsNA(na_acc) ) {
592 return null;
593 }
594 CMutexGuard guard(m_Mutex);
595 TMissingFileSet::iterator it2 = m_MissingFileSet.find(na_acc);
596 if ( it2 != m_MissingFileSet.end() ) {
597 return null;
598 }
599 TAutoFileMap::iterator it = m_AutoFileMap.find(na_acc);
600 if ( it != m_AutoFileMap.end() ) {
601 return it->second;
602 }
603 CRef<SVDBFileInfo> info(new SVDBFileInfo);
604 info->m_VDBFile = na_acc;
605 info->m_BaseAnnotName = na_acc;
606 try {
607 if ( GetDebugLevel() >= 2 ) {
608 LOG_POST_X(2, "CVDBGraphDataLoader: auto-opening file "<<na_acc);
609 }
610 info->m_VDB = CVDBGraphDb(m_Mgr, na_acc, GetLookupType());
611 }
612 catch ( CSraException& exc ) {
613 if ( exc.GetErrCode() != exc.eNotFoundDb ) {
614 throw;
615 }
616 if ( GetDebugLevel() >= 2 ) {
617 LOG_POST_X(3, "CVDBGraphDataLoader: accession not found: "<<na_acc);
618 }
619 m_MissingFileSet[na_acc] = true;
620 return null;
621 }
622 m_AutoFileMap[na_acc] = info;
623 return info;
624 }
625
626
627 CRef<CVDBGraphDataLoader_Impl::SVDBFileInfo>
x_GetFileInfo(const string & name)628 CVDBGraphDataLoader_Impl::x_GetFileInfo(const string& name)
629 {
630 TFixedFileMap::iterator it = m_FixedFileMap.find(name);
631 if ( it != m_FixedFileMap.end() ) {
632 return it->second;
633 }
634 return x_GetNAFileInfo(name);
635 }
636
637
638 END_SCOPE(objects)
639 END_NCBI_SCOPE
640