1 /*  $Id: csraloader.cpp 610971 2020-06-26 12:57:19Z grichenk $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Eugene Vasilchenko
27  *
28  * File Description: CSRA file data loader
29  *
30  * ===========================================================================
31  */
32 
33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbistd.hpp>
35 
36 #include <objects/general/general__.hpp>
37 #include <objects/seqloc/Seq_id.hpp>
38 #include <objects/seq/seq__.hpp>
39 #include <objects/seqres/seqres__.hpp>
40 
41 #include <objmgr/impl/data_source.hpp>
42 #include <objmgr/impl/tse_loadlock.hpp>
43 #include <objmgr/impl/tse_chunk_info.hpp>
44 #include <objmgr/data_loader_factory.hpp>
45 #include <corelib/plugin_manager_impl.hpp>
46 #include <corelib/plugin_manager_store.hpp>
47 
48 #include <sra/data_loaders/csra/csraloader.hpp>
49 #include <sra/data_loaders/csra/impl/csraloader_impl.hpp>
50 
51 BEGIN_NCBI_SCOPE
52 BEGIN_SCOPE(objects)
53 
54 class CDataLoader;
55 
56 BEGIN_LOCAL_NAMESPACE;
57 
58 class CLoaderFilter : public CObjectManager::IDataLoaderFilter {
59 public:
IsDataLoaderMatches(CDataLoader & loader) const60     bool IsDataLoaderMatches(CDataLoader& loader) const {
61         return dynamic_cast<CCSRADataLoader*>(&loader) != 0;
62     }
63 };
64 
65 
66 class CRevoker {
67 public:
~CRevoker()68     ~CRevoker() {
69         CLoaderFilter filter;
70         CObjectManager::GetInstance()->RevokeDataLoaders(filter);
71     }
72 };
73 static CSafeStatic<CRevoker> s_Revoker(CSafeStaticLifeSpan(
74     CSafeStaticLifeSpan::eLifeLevel_AppMain,
75     CSafeStaticLifeSpan::eLifeSpan_Long));
76 
77 END_LOCAL_NAMESPACE;
78 
79 
80 /////////////////////////////////////////////////////////////////////////////
81 // CCSRADataLoader params
82 /////////////////////////////////////////////////////////////////////////////
83 
84 
85 NCBI_PARAM_DECL(string, CSRA, ACCESSIONS);
86 NCBI_PARAM_DEF(string, CSRA, ACCESSIONS, "");
87 
88 
89 NCBI_PARAM_DECL(bool, CSRA_LOADER, PILEUP_GRAPHS);
90 NCBI_PARAM_DEF_EX(bool, CSRA_LOADER, PILEUP_GRAPHS, true,
91                   eParam_NoThread, CSRA_LOADER_PILEUP_GRAPHS);
92 
GetPileupGraphsParamDefault(void)93 bool CCSRADataLoader::GetPileupGraphsParamDefault(void)
94 {
95     return NCBI_PARAM_TYPE(CSRA_LOADER, PILEUP_GRAPHS)::GetDefault();
96 }
97 
98 
SetPileupGraphsParamDefault(bool param)99 void CCSRADataLoader::SetPileupGraphsParamDefault(bool param)
100 {
101     NCBI_PARAM_TYPE(CSRA_LOADER, PILEUP_GRAPHS)::SetDefault(param);
102 }
103 
104 
105 NCBI_PARAM_DECL(bool, CSRA_LOADER, QUALITY_GRAPHS);
106 NCBI_PARAM_DEF_EX(bool, CSRA_LOADER, QUALITY_GRAPHS, false,
107                   eParam_NoThread, CSRA_LOADER_QUALITY_GRAPHS);
108 
GetQualityGraphsParamDefault(void)109 bool CCSRADataLoader::GetQualityGraphsParamDefault(void)
110 {
111     return NCBI_PARAM_TYPE(CSRA_LOADER, QUALITY_GRAPHS)::GetDefault();
112 }
113 
114 
SetQualityGraphsParamDefault(bool param)115 void CCSRADataLoader::SetQualityGraphsParamDefault(bool param)
116 {
117     return NCBI_PARAM_TYPE(CSRA_LOADER, QUALITY_GRAPHS)::SetDefault(param);
118 }
119 
120 
121 NCBI_PARAM_DECL(int, CSRA_LOADER, MIN_MAP_QUALITY);
122 NCBI_PARAM_DEF_EX(int, CSRA_LOADER, MIN_MAP_QUALITY, 0,
123                   eParam_NoThread, CSRA_LOADER_MIN_MAP_QUALITY);
124 
GetMinMapQualityParamDefault(void)125 int CCSRADataLoader::GetMinMapQualityParamDefault(void)
126 {
127     return NCBI_PARAM_TYPE(CSRA_LOADER, MIN_MAP_QUALITY)::GetDefault();
128 }
129 
130 
SetMinMapQualityParamDefault(int param)131 void CCSRADataLoader::SetMinMapQualityParamDefault(int param)
132 {
133     return NCBI_PARAM_TYPE(CSRA_LOADER, MIN_MAP_QUALITY)::SetDefault(param);
134 }
135 
136 
137 NCBI_PARAM_DECL(int, CSRA_LOADER, MAX_SEPARATE_SPOT_GROUPS);
138 NCBI_PARAM_DEF_EX(int, CSRA_LOADER, MAX_SEPARATE_SPOT_GROUPS, 0,
139                   eParam_NoThread, CSRA_LOADER_MAX_SEPARATE_SPOT_GROUPS);
140 
GetSpotGroupsParamDefault(void)141 int CCSRADataLoader::GetSpotGroupsParamDefault(void)
142 {
143     return NCBI_PARAM_TYPE(CSRA_LOADER, MAX_SEPARATE_SPOT_GROUPS)::GetDefault();
144 }
145 
146 
SetSpotGroupsParamDefault(int param)147 void CCSRADataLoader::SetSpotGroupsParamDefault(int param)
148 {
149     return NCBI_PARAM_TYPE(CSRA_LOADER, MAX_SEPARATE_SPOT_GROUPS)::SetDefault(param);
150 }
151 
152 
153 NCBI_PARAM_DECL(bool, CSRA_LOADER, SPOT_READ_ALIGN);
154 NCBI_PARAM_DEF(bool, CSRA_LOADER, SPOT_READ_ALIGN, false);
155 
GetSpotReadAlignParamDefault(void)156 bool CCSRADataLoader::GetSpotReadAlignParamDefault(void)
157 {
158     return NCBI_PARAM_TYPE(CSRA_LOADER, SPOT_READ_ALIGN)::GetDefault();
159 }
160 
161 
SetSpotReadAlignParamDefault(bool param)162 void CCSRADataLoader::SetSpotReadAlignParamDefault(bool param)
163 {
164     return NCBI_PARAM_TYPE(CSRA_LOADER, SPOT_READ_ALIGN)::SetDefault(param);
165 }
166 
167 
GetSpotReadAlign() const168 bool CCSRADataLoader::GetSpotReadAlign() const
169 {
170     return m_Impl->GetSpotReadAlign();
171 }
172 
173 
SetSpotReadAlign(bool value)174 void CCSRADataLoader::SetSpotReadAlign(bool value)
175 {
176     m_Impl->SetSpotReadAlign(value);
177 }
178 
179 
GetEffectiveMinMapQuality(void) const180 int CCSRADataLoader::SLoaderParams::GetEffectiveMinMapQuality(void) const
181 {
182     return m_MinMapQuality != kMinMapQuality_config?
183         m_MinMapQuality: CCSRADataLoader::GetMinMapQualityParamDefault();
184 }
185 
186 
GetEffectivePileupGraphs(void) const187 bool CCSRADataLoader::SLoaderParams::GetEffectivePileupGraphs(void) const
188 {
189     return m_PileupGraphs != kPileupGraphs_config?
190         m_PileupGraphs != 0: CCSRADataLoader::GetPileupGraphsParamDefault();
191 }
192 
193 
GetEffectiveQualityGraphs(void) const194 bool CCSRADataLoader::SLoaderParams::GetEffectiveQualityGraphs(void) const
195 {
196     return m_QualityGraphs != kQualityGraphs_config?
197         m_QualityGraphs != 0: CCSRADataLoader::GetQualityGraphsParamDefault();
198 }
199 
200 
GetEffectiveSpotReadAlign(void) const201 bool CCSRADataLoader::SLoaderParams::GetEffectiveSpotReadAlign(void) const
202 {
203     return m_SpotReadAlign != kSpotReadAlign_config?
204         m_SpotReadAlign != 0: CCSRADataLoader::GetSpotReadAlignParamDefault();
205 }
206 
207 
GetEffectiveSpotGroups(void) const208 int CCSRADataLoader::SLoaderParams::GetEffectiveSpotGroups(void) const
209 {
210     return m_SpotGroups != kSpotGroups_config?
211         m_SpotGroups != 0: CCSRADataLoader::GetSpotGroupsParamDefault();
212 }
213 
214 
GetLoaderName(void) const215 string CCSRADataLoader::SLoaderParams::GetLoaderName(void) const
216 {
217     CNcbiOstrstream str;
218     str << "CCSRADataLoader:" << m_DirPath;
219     if ( !m_CSRAFiles.empty() ) {
220         str << "/files=";
221         ITERATE ( vector<string>, it, m_CSRAFiles ) {
222             str << "+" << *it;
223         }
224     }
225     if ( m_IdMapper ) {
226         str << "/mapper=" << m_IdMapper.get();
227     }
228     if ( !m_AnnotName.empty() ) {
229         str << "/name=" << m_AnnotName;
230     }
231     if ( m_MinMapQuality != kMinMapQuality_config ) {
232         str << "/q=" << m_MinMapQuality;
233     }
234     if ( m_PileupGraphs != kPileupGraphs_config ) {
235         str << "/pileup_graphs=" << m_PileupGraphs;
236     }
237     if ( m_QualityGraphs != kQualityGraphs_config ) {
238         str << "/quality_graphs=" << m_QualityGraphs;
239     }
240     if ( m_SpotGroups != kSpotGroups_config ) {
241         str << "/spot_groups=" << m_SpotGroups;
242     }
243     if ( m_PathInId != kPathInId_config ) {
244         str << "/path_in_id=" << m_PathInId;
245     }
246     return CNcbiOstrstreamToString(str);
247 }
248 
249 
250 /////////////////////////////////////////////////////////////////////////////
251 // CCSRADataLoader
252 /////////////////////////////////////////////////////////////////////////////
253 
RegisterInObjectManager(CObjectManager & om,const SLoaderParams & params,CObjectManager::EIsDefault is_default,CObjectManager::TPriority priority)254 CCSRADataLoader::TRegisterLoaderInfo CCSRADataLoader::RegisterInObjectManager(
255     CObjectManager& om,
256     const SLoaderParams& params,
257     CObjectManager::EIsDefault is_default,
258     CObjectManager::TPriority priority)
259 {
260     TMaker maker(params);
261     CDataLoader::RegisterInObjectManager(om, maker, is_default, priority);
262     return maker.GetRegisterInfo();
263 }
264 
265 
RegisterInObjectManager(CObjectManager & om,CObjectManager::EIsDefault is_default,CObjectManager::TPriority priority)266 CCSRADataLoader::TRegisterLoaderInfo CCSRADataLoader::RegisterInObjectManager(
267     CObjectManager& om,
268     CObjectManager::EIsDefault is_default,
269     CObjectManager::TPriority priority)
270 {
271     SLoaderParams params;
272     NStr::Split(NCBI_PARAM_TYPE(CSRA, ACCESSIONS)::GetDefault(), ",",
273                    params.m_CSRAFiles);
274     TMaker maker(params);
275     CDataLoader::RegisterInObjectManager(om, maker, is_default, priority);
276     return maker.GetRegisterInfo();
277 }
278 
279 
RegisterInObjectManager(CObjectManager & om,const string & srz_acc,CObjectManager::EIsDefault is_default,CObjectManager::TPriority priority)280 CCSRADataLoader::TRegisterLoaderInfo CCSRADataLoader::RegisterInObjectManager(
281     CObjectManager& om,
282     const string& srz_acc,
283     CObjectManager::EIsDefault is_default,
284     CObjectManager::TPriority priority)
285 {
286     SLoaderParams params;
287     params.m_DirPath = srz_acc;
288     TMaker maker(params);
289     CDataLoader::RegisterInObjectManager(om, maker, is_default, priority);
290     return maker.GetRegisterInfo();
291 }
292 
293 
RegisterInObjectManager(CObjectManager & om,const string & dir_path,const string & csra_name,CObjectManager::EIsDefault is_default,CObjectManager::TPriority priority)294 CCSRADataLoader::TRegisterLoaderInfo CCSRADataLoader::RegisterInObjectManager(
295     CObjectManager& om,
296     const string& dir_path,
297     const string& csra_name,
298     CObjectManager::EIsDefault is_default,
299     CObjectManager::TPriority priority)
300 {
301     SLoaderParams params;
302     params.m_DirPath = dir_path;
303     params.m_CSRAFiles.push_back(csra_name);
304     TMaker maker(params);
305     CDataLoader::RegisterInObjectManager(om, maker, is_default, priority);
306     return maker.GetRegisterInfo();
307 }
308 
309 
RegisterInObjectManager(CObjectManager & om,const string & dir_path,const vector<string> & csra_files,CObjectManager::EIsDefault is_default,CObjectManager::TPriority priority)310 CCSRADataLoader::TRegisterLoaderInfo CCSRADataLoader::RegisterInObjectManager(
311     CObjectManager& om,
312     const string& dir_path,
313     const vector<string>& csra_files,
314     CObjectManager::EIsDefault is_default,
315     CObjectManager::TPriority priority)
316 {
317     SLoaderParams params;
318     params.m_DirPath = dir_path;
319     params.m_CSRAFiles = csra_files;
320     TMaker maker(params);
321     CDataLoader::RegisterInObjectManager(om, maker, is_default, priority);
322     return maker.GetRegisterInfo();
323 }
324 
325 
GetLoaderNameFromArgs(void)326 string CCSRADataLoader::GetLoaderNameFromArgs(void)
327 {
328     return "CCSRADataLoader";
329 }
330 
331 
GetLoaderNameFromArgs(const SLoaderParams & params)332 string CCSRADataLoader::GetLoaderNameFromArgs(const SLoaderParams& params)
333 {
334     return params.GetLoaderName();
335 }
336 
337 
GetLoaderNameFromArgs(const string & srz_acc)338 string CCSRADataLoader::GetLoaderNameFromArgs(const string& srz_acc)
339 {
340     SLoaderParams params;
341     params.m_DirPath = srz_acc;
342     return GetLoaderNameFromArgs(params);
343 }
344 
345 
GetLoaderNameFromArgs(const string & dir_path,const string & csra_name)346 string CCSRADataLoader::GetLoaderNameFromArgs(const string& dir_path,
347                                               const string& csra_name)
348 {
349     SLoaderParams params;
350     params.m_DirPath = dir_path;
351     params.m_CSRAFiles.push_back(csra_name);
352     return GetLoaderNameFromArgs(params);
353 }
354 
355 
GetLoaderNameFromArgs(const string & dir_path,const vector<string> & csra_files)356 string CCSRADataLoader::GetLoaderNameFromArgs(
357     const string& dir_path,
358     const vector<string>& csra_files)
359 {
360     SLoaderParams params;
361     params.m_DirPath = dir_path;
362     params.m_CSRAFiles = csra_files;
363     return GetLoaderNameFromArgs(params);
364 }
365 
366 
CCSRADataLoader(const string & loader_name,const SLoaderParams & params)367 CCSRADataLoader::CCSRADataLoader(const string& loader_name,
368                                  const SLoaderParams& params)
369     : CDataLoader(loader_name)
370 {
371     string dir_path = params.m_DirPath;
372 /*
373     if ( dir_path.empty() ) {
374         dir_path = NCBI_PARAM_TYPE(CSRA, DIR_PATH)::GetDefault();
375     }
376 */
377     m_Impl.Reset(new CCSRADataLoader_Impl(params));
378 }
379 
380 
~CCSRADataLoader(void)381 CCSRADataLoader::~CCSRADataLoader(void)
382 {
383 }
384 
385 
GetBlobId(const CSeq_id_Handle & idh)386 CDataLoader::TBlobId CCSRADataLoader::GetBlobId(const CSeq_id_Handle& idh)
387 {
388     return TBlobId(m_Impl->GetBlobId(idh).GetPointerOrNull());
389 }
390 
391 
392 CDataLoader::TBlobId
GetBlobIdFromString(const string & str) const393 CCSRADataLoader::GetBlobIdFromString(const string& str) const
394 {
395     return TBlobId(new CCSRABlobId(str));
396 }
397 
398 
CanGetBlobById(void) const399 bool CCSRADataLoader::CanGetBlobById(void) const
400 {
401     return true;
402 }
403 
404 
405 CDataLoader::TTSE_LockSet
GetRecords(const CSeq_id_Handle & idh,EChoice choice)406 CCSRADataLoader::GetRecords(const CSeq_id_Handle& idh,
407                            EChoice choice)
408 {
409     return m_Impl->GetRecords(GetDataSource(), idh, choice);
410 }
411 
412 
GetChunk(TChunk chunk)413 void CCSRADataLoader::GetChunk(TChunk chunk)
414 {
415     TBlobId blob_id = chunk->GetBlobId();
416     const CCSRABlobId& csra_id = dynamic_cast<const CCSRABlobId&>(*blob_id);
417     m_Impl->LoadChunk(csra_id, *chunk);
418 }
419 
420 
GetChunks(const TChunkSet & chunks)421 void CCSRADataLoader::GetChunks(const TChunkSet& chunks)
422 {
423     ITERATE ( TChunkSet, it, chunks ) {
424         GetChunk(*it);
425     }
426 }
427 
428 
429 CDataLoader::TTSE_Lock
GetBlobById(const TBlobId & blob_id)430 CCSRADataLoader::GetBlobById(const TBlobId& blob_id)
431 {
432     return m_Impl->GetBlobById(GetDataSource(),
433                                dynamic_cast<const CCSRABlobId&>(*blob_id));
434 }
435 
436 
GetPossibleAnnotNames(void) const437 CCSRADataLoader::TAnnotNames CCSRADataLoader::GetPossibleAnnotNames(void) const
438 {
439     return m_Impl->GetPossibleAnnotNames();
440 }
441 
442 
GetIds(const CSeq_id_Handle & idh,TIds & ids)443 void CCSRADataLoader::GetIds(const CSeq_id_Handle& idh, TIds& ids)
444 {
445     m_Impl->GetIds(idh, ids);
446 }
447 
448 
449 CDataLoader::SAccVerFound
GetAccVerFound(const CSeq_id_Handle & idh)450 CCSRADataLoader::GetAccVerFound(const CSeq_id_Handle& idh)
451 {
452     return m_Impl->GetAccVer(idh);
453 }
454 
455 
456 CDataLoader::SGiFound
GetGiFound(const CSeq_id_Handle & idh)457 CCSRADataLoader::GetGiFound(const CSeq_id_Handle& idh)
458 {
459     return m_Impl->GetGi(idh);
460 }
461 
462 
GetLabel(const CSeq_id_Handle & idh)463 string CCSRADataLoader::GetLabel(const CSeq_id_Handle& idh)
464 {
465     return m_Impl->GetLabel(idh);
466 }
467 
468 
GetTaxId(const CSeq_id_Handle & idh)469 TTaxId CCSRADataLoader::GetTaxId(const CSeq_id_Handle& idh)
470 {
471     return m_Impl->GetTaxId(idh);
472 }
473 
474 
GetSequenceLength(const CSeq_id_Handle & idh)475 TSeqPos CCSRADataLoader::GetSequenceLength(const CSeq_id_Handle& idh)
476 {
477     return m_Impl->GetSequenceLength(idh);
478 }
479 
480 
481 CDataLoader::STypeFound
GetSequenceTypeFound(const CSeq_id_Handle & idh)482 CCSRADataLoader::GetSequenceTypeFound(const CSeq_id_Handle& idh)
483 {
484     return m_Impl->GetSequenceType(idh);
485 }
486 
487 
GetDefaultPriority(void) const488 CObjectManager::TPriority CCSRADataLoader::GetDefaultPriority(void) const
489 {
490     return CObjectManager::kPriority_Replace;
491 }
492 
493 
GetDefaultBlobCacheSizeLimit() const494 unsigned CCSRADataLoader::GetDefaultBlobCacheSizeLimit() const
495 {
496     // do not cache released BAM TSEs
497     return 0;
498 }
499 
500 
501 END_SCOPE(objects)
502 
503 // ===========================================================================
504 
505 USING_SCOPE(objects);
506 
DataLoaders_Register_CSRA(void)507 void DataLoaders_Register_CSRA(void)
508 {
509     RegisterEntryPoint<CDataLoader>(NCBI_EntryPoint_DataLoader_CSRA);
510 }
511 
512 
513 const char kDataLoader_CSRA_DriverName[] = "csra";
514 
515 class CCSRA_DataLoaderCF : public CDataLoaderFactory
516 {
517 public:
CCSRA_DataLoaderCF(void)518     CCSRA_DataLoaderCF(void)
519         : CDataLoaderFactory(kDataLoader_CSRA_DriverName) {}
~CCSRA_DataLoaderCF(void)520     virtual ~CCSRA_DataLoaderCF(void) {}
521 
522 protected:
523     virtual CDataLoader* CreateAndRegister(
524         CObjectManager& om,
525         const TPluginManagerParamTree* params) const;
526 };
527 
528 
CreateAndRegister(CObjectManager & om,const TPluginManagerParamTree * params) const529 CDataLoader* CCSRA_DataLoaderCF::CreateAndRegister(
530     CObjectManager& om,
531     const TPluginManagerParamTree* params) const
532 {
533     if ( !ValidParams(params) ) {
534         // Use constructor without arguments
535         return CCSRADataLoader::RegisterInObjectManager(om).GetLoader();
536     }
537     // IsDefault and Priority arguments may be specified
538     return CCSRADataLoader::RegisterInObjectManager(
539         om,
540         GetIsDefault(params),
541         GetPriority(params)).GetLoader();
542 }
543 
544 
NCBI_EntryPoint_DataLoader_CSRA(CPluginManager<CDataLoader>::TDriverInfoList & info_list,CPluginManager<CDataLoader>::EEntryPointRequest method)545 void NCBI_EntryPoint_DataLoader_CSRA(
546     CPluginManager<CDataLoader>::TDriverInfoList&   info_list,
547     CPluginManager<CDataLoader>::EEntryPointRequest method)
548 {
549     CHostEntryPointImpl<CCSRA_DataLoaderCF>::NCBI_EntryPointImpl(info_list, method);
550 }
551 
552 
NCBI_EntryPoint_xloader_csra(CPluginManager<objects::CDataLoader>::TDriverInfoList & info_list,CPluginManager<objects::CDataLoader>::EEntryPointRequest method)553 void NCBI_EntryPoint_xloader_csra(
554     CPluginManager<objects::CDataLoader>::TDriverInfoList&   info_list,
555     CPluginManager<objects::CDataLoader>::EEntryPointRequest method)
556 {
557     NCBI_EntryPoint_DataLoader_CSRA(info_list, method);
558 }
559 
560 
561 END_NCBI_SCOPE
562