1 /* $Id: sraloader.cpp 592089 2019-08-26 18:56:36Z grichenk $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Eugene Vasilchenko
27 *
28 * File Description: SRA file data loader
29 *
30 * ===========================================================================
31 */
32
33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbistd.hpp>
35
36 #include <objects/general/general__.hpp>
37 #include <objects/seqloc/Seq_id.hpp>
38 #include <objects/seq/seq__.hpp>
39 #include <objects/seqres/seqres__.hpp>
40
41 #include <objmgr/impl/data_source.hpp>
42 #include <objmgr/impl/tse_loadlock.hpp>
43 #include <objmgr/data_loader_factory.hpp>
44 #include <corelib/plugin_manager_impl.hpp>
45 #include <corelib/plugin_manager_store.hpp>
46
47 #include <sra/data_loaders/sra/sraloader.hpp>
48 #include <sra/data_loaders/sra/impl/sraloader_impl.hpp>
49 #include <sra/readers/ncbi_traces_path.hpp>
50
51 BEGIN_NCBI_SCOPE
52
53 class CObject;
54
55 NCBI_PARAM_DECL(bool, SRA_LOADER, TRIM);
56 NCBI_PARAM_DEF_EX(bool, SRA_LOADER, TRIM, false,
57 eParam_NoThread, SRA_LOADER_TRIM);
58
59 BEGIN_SCOPE(objects)
60
61 class CDataLoader;
62
63 BEGIN_LOCAL_NAMESPACE;
64
65 class CLoaderFilter : public CObjectManager::IDataLoaderFilter {
66 public:
IsDataLoaderMatches(CDataLoader & loader) const67 bool IsDataLoaderMatches(CDataLoader& loader) const {
68 return dynamic_cast<CSRADataLoader*>(&loader) != 0;
69 }
70 };
71
72
73 class CRevoker {
74 public:
~CRevoker()75 ~CRevoker() {
76 CLoaderFilter filter;
77 CObjectManager::GetInstance()->RevokeDataLoaders(filter);
78 }
79 };
80 static CSafeStatic<CRevoker> s_Revoker(CSafeStaticLifeSpan(
81 CSafeStaticLifeSpan::eLifeLevel_AppMain,
82 CSafeStaticLifeSpan::eLifeSpan_Long));
83
84 END_LOCAL_NAMESPACE;
85
86
87 /////////////////////////////////////////////////////////////////////////////
88 // CSRABlobId
89 /////////////////////////////////////////////////////////////////////////////
90
91 class CSRABlobId : public CBlobId
92 {
93 public:
94 CSRABlobId(const string& acc, unsigned spot_id);
95 ~CSRABlobId(void);
96
97 string m_Accession;
98 unsigned m_SpotId;
99
100 string ToString(void) const;
101 bool operator<(const CBlobId& id) const;
102 bool operator==(const CBlobId& id) const;
103 };
104
105
CSRABlobId(const string & acc,unsigned spot_id)106 CSRABlobId::CSRABlobId(const string& acc, unsigned spot_id)
107 : m_Accession(acc), m_SpotId(spot_id)
108 {
109 }
110
111
~CSRABlobId(void)112 CSRABlobId::~CSRABlobId(void)
113 {
114 }
115
116
ToString(void) const117 string CSRABlobId::ToString(void) const
118 {
119 CNcbiOstrstream out;
120 out << m_Accession << '.' << m_SpotId;
121 return CNcbiOstrstreamToString(out);
122 }
123
124
operator <(const CBlobId & id) const125 bool CSRABlobId::operator<(const CBlobId& id) const
126 {
127 const CSRABlobId& sra2 = dynamic_cast<const CSRABlobId&>(id);
128 return m_Accession < sra2.m_Accession ||
129 (m_Accession == sra2.m_Accession && m_SpotId < sra2.m_SpotId);
130 }
131
132
operator ==(const CBlobId & id) const133 bool CSRABlobId::operator==(const CBlobId& id) const
134 {
135 const CSRABlobId& sra2 = dynamic_cast<const CSRABlobId&>(id);
136 return m_Accession == sra2.m_Accession && m_SpotId == sra2.m_SpotId;
137 }
138
139
140 /////////////////////////////////////////////////////////////////////////////
141 // CSRADataLoader_Impl
142 /////////////////////////////////////////////////////////////////////////////
143
144
GetTrimParam(void)145 static bool GetTrimParam(void)
146 {
147 static NCBI_PARAM_TYPE(SRA_LOADER, TRIM) s_Value;
148 return s_Value.Get();
149 }
150
151
CSRADataLoader_Impl(CSraMgr::ETrim trim)152 CSRADataLoader_Impl::CSRADataLoader_Impl(CSraMgr::ETrim trim)
153 : m_Mgr(trim)
154 {
155 }
156
157
~CSRADataLoader_Impl(void)158 CSRADataLoader_Impl::~CSRADataLoader_Impl(void)
159 {
160 }
161
162
LoadSRAEntry(const string & accession,unsigned spot_id)163 CRef<CSeq_entry> CSRADataLoader_Impl::LoadSRAEntry(const string& accession,
164 unsigned spot_id)
165 {
166 CMutexGuard LOCK(m_Mutex);
167 if ( m_Run.GetAccession() != accession ) {
168 m_Run.Init(m_Mgr, accession);
169 }
170 return m_Run.GetSpotEntry(spot_id);
171 }
172
173
GetSequenceType(const string & accession,unsigned spot_id,unsigned read_id)174 CSeq_inst::TMol CSRADataLoader_Impl::GetSequenceType(const string& accession,
175 unsigned spot_id,
176 unsigned read_id)
177 {
178 CMutexGuard LOCK(m_Mutex);
179 if ( m_Run.GetAccession() != accession ) {
180 m_Run.Init(m_Mgr, accession);
181 }
182 return m_Run.GetSequenceType(spot_id, read_id);
183 }
184
185
GetSequenceLength(const string & accession,unsigned spot_id,unsigned read_id)186 TSeqPos CSRADataLoader_Impl::GetSequenceLength(const string& accession,
187 unsigned spot_id,
188 unsigned read_id)
189 {
190 CMutexGuard LOCK(m_Mutex);
191 if ( m_Run.GetAccession() != accession ) {
192 m_Run.Init(m_Mgr, accession);
193 }
194 return m_Run.GetSequenceLength(spot_id, read_id);
195 }
196
197
198 /////////////////////////////////////////////////////////////////////////////
199 // CSRADataLoader
200 /////////////////////////////////////////////////////////////////////////////
201
SLoaderParams(void)202 CSRADataLoader::SLoaderParams::SLoaderParams(void)
203 : m_Trim(GetTrimParam())
204 {
205 }
206
207
SLoaderParams(bool trim)208 CSRADataLoader::SLoaderParams::SLoaderParams(bool trim)
209 : m_Trim(trim)
210 {
211 }
212
213
~SLoaderParams(void)214 CSRADataLoader::SLoaderParams::~SLoaderParams(void)
215 {
216 }
217
218
RegisterInObjectManager(CObjectManager & om,CObjectManager::EIsDefault is_default,CObjectManager::TPriority priority)219 CSRADataLoader::TRegisterLoaderInfo CSRADataLoader::RegisterInObjectManager(
220 CObjectManager& om,
221 CObjectManager::EIsDefault is_default,
222 CObjectManager::TPriority priority)
223 {
224 SLoaderParams params;
225 TMaker maker(params);
226 CDataLoader::RegisterInObjectManager(om, maker, is_default, priority);
227 return maker.GetRegisterInfo();
228 }
229
230
RegisterInObjectManager(CObjectManager & om,const string & rep_path,const string & vol_path,CObjectManager::EIsDefault is_default,CObjectManager::TPriority priority)231 CSRADataLoader::TRegisterLoaderInfo CSRADataLoader::RegisterInObjectManager(
232 CObjectManager& om,
233 const string& rep_path,
234 const string& vol_path,
235 CObjectManager::EIsDefault is_default,
236 CObjectManager::TPriority priority)
237 {
238 SLoaderParams params;
239 params.m_RepPath = rep_path;
240 params.m_VolPath = vol_path;
241 TMaker maker(params);
242 CDataLoader::RegisterInObjectManager(om, maker, is_default, priority);
243 return maker.GetRegisterInfo();
244 }
245
246
RegisterInObjectManager(CObjectManager & om,ETrim trim,CObjectManager::EIsDefault is_default,CObjectManager::TPriority priority)247 CSRADataLoader::TRegisterLoaderInfo CSRADataLoader::RegisterInObjectManager(
248 CObjectManager& om,
249 ETrim trim,
250 CObjectManager::EIsDefault is_default,
251 CObjectManager::TPriority priority)
252 {
253 SLoaderParams params(trim == eTrim);
254 TMaker maker(params);
255 CDataLoader::RegisterInObjectManager(om, maker, is_default, priority);
256 return maker.GetRegisterInfo();
257 }
258
259
RegisterInObjectManager(CObjectManager & om,const string & rep_path,const string & vol_path,ETrim trim,CObjectManager::EIsDefault is_default,CObjectManager::TPriority priority)260 CSRADataLoader::TRegisterLoaderInfo CSRADataLoader::RegisterInObjectManager(
261 CObjectManager& om,
262 const string& rep_path,
263 const string& vol_path,
264 ETrim trim,
265 CObjectManager::EIsDefault is_default,
266 CObjectManager::TPriority priority)
267 {
268 SLoaderParams params(trim == eTrim);
269 params.m_RepPath = rep_path;
270 params.m_VolPath = vol_path;
271 TMaker maker(params);
272 CDataLoader::RegisterInObjectManager(om, maker, is_default, priority);
273 return maker.GetRegisterInfo();
274 }
275
276
GetLoaderNameFromArgs(const SLoaderParams & params)277 string CSRADataLoader::GetLoaderNameFromArgs(const SLoaderParams& params)
278 {
279 string ret = "SRADataLoader";
280 if ( params.m_Trim ) {
281 ret += "Trim";
282 }
283 if ( !params.m_RepPath.empty() || !params.m_VolPath.empty() ) {
284 ret += ":";
285 ret += params.m_RepPath;
286 ret += ":";
287 ret += params.m_VolPath;
288 }
289 return ret;
290 }
291
292
GetLoaderNameFromArgs(void)293 string CSRADataLoader::GetLoaderNameFromArgs(void)
294 {
295 SLoaderParams params;
296 return GetLoaderNameFromArgs(params);
297 }
298
299
GetLoaderNameFromArgs(const string & rep_path,const string & vol_path)300 string CSRADataLoader::GetLoaderNameFromArgs(const string& rep_path,
301 const string& vol_path)
302 {
303 SLoaderParams params;
304 params.m_RepPath = rep_path;
305 params.m_VolPath = vol_path;
306 return GetLoaderNameFromArgs(params);
307 }
308
309
GetLoaderNameFromArgs(ETrim trim)310 string CSRADataLoader::GetLoaderNameFromArgs(ETrim trim)
311 {
312 SLoaderParams params(trim == eTrim);
313 return GetLoaderNameFromArgs(params);
314 }
315
316
GetLoaderNameFromArgs(const string & rep_path,const string & vol_path,ETrim trim)317 string CSRADataLoader::GetLoaderNameFromArgs(const string& rep_path,
318 const string& vol_path,
319 ETrim trim)
320 {
321 SLoaderParams params(trim == eTrim);
322 params.m_RepPath = rep_path;
323 params.m_VolPath = vol_path;
324 return GetLoaderNameFromArgs(params);
325 }
326
327
CSRADataLoader(const string & loader_name,const SLoaderParams & params)328 CSRADataLoader::CSRADataLoader(const string& loader_name,
329 const SLoaderParams& params)
330 : CDataLoader(loader_name)
331 {
332 CSraMgr::ETrim trim = params.m_Trim? CSraMgr::eTrim: CSraMgr::eNoTrim;
333 m_Impl = new CSRADataLoader_Impl(trim);
334 }
335
336
~CSRADataLoader(void)337 CSRADataLoader::~CSRADataLoader(void)
338 {
339 }
340
341
342 typedef pair<CRef<CSRABlobId>, unsigned> TReadId;
343
sx_GetReadId(const string & sra,bool with_chunk)344 static TReadId sx_GetReadId(const string& sra, bool with_chunk)
345 {
346 SIZE_TYPE dot1 = sra.find('.');
347 if ( dot1 == NPOS ) {
348 return TReadId();
349 }
350 SIZE_TYPE dot2 = with_chunk? sra.find('.', dot1+1): sra.size();
351 if ( dot2 == NPOS || dot1+1 >= dot2 || sra[dot1+1] == '0' ||
352 (with_chunk && (dot2+2 != sra.size() ||
353 (sra[dot2+1] != '2' && sra[dot2+1] != '4') )) ) {
354 return TReadId();
355 }
356 unsigned spot_id =
357 NStr::StringToUInt(CTempString(sra.data()+dot1+1, dot2-dot1-1));
358 TReadId ret;
359 ret.first = new CSRABlobId(sra.substr(0, dot1), spot_id);
360 ret.second = sra[dot2+1] - '0';
361 return ret;
362 }
363
364
sx_GetReadId(const CSeq_id_Handle & idh)365 static TReadId sx_GetReadId(const CSeq_id_Handle& idh)
366 {
367 if ( idh.Which() != CSeq_id::e_General ) {
368 return TReadId();
369 }
370 CConstRef<CSeq_id> id = idh.GetSeqId();
371 const CDbtag& general = id->GetGeneral();
372 if ( general.GetDb() != "SRA") {
373 return TReadId();
374 }
375 return sx_GetReadId(general.GetTag().GetStr(), true);
376 }
377
378
GetBlobId(const CSeq_id_Handle & idh)379 CDataLoader::TBlobId CSRADataLoader::GetBlobId(const CSeq_id_Handle& idh)
380 {
381 return TBlobId(sx_GetReadId(idh).first);
382 }
383
384
385 CDataLoader::TBlobId
GetBlobIdFromString(const string & str) const386 CSRADataLoader::GetBlobIdFromString(const string& str) const
387 {
388 return TBlobId(sx_GetReadId(str, false).first);
389 }
390
391
CanGetBlobById(void) const392 bool CSRADataLoader::CanGetBlobById(void) const
393 {
394 return true;
395 }
396
397
398 CDataLoader::TTSE_LockSet
GetRecords(const CSeq_id_Handle & idh,EChoice)399 CSRADataLoader::GetRecords(const CSeq_id_Handle& idh,
400 EChoice /* choice */)
401 {
402 TTSE_LockSet locks;
403 TBlobId blob_id = GetBlobId(idh);
404 if ( blob_id ) {
405 locks.insert(GetBlobById(blob_id));
406 }
407 return locks;
408 }
409
410
411 CDataLoader::TTSE_Lock
GetBlobById(const TBlobId & blob_id)412 CSRADataLoader::GetBlobById(const TBlobId& blob_id)
413 {
414 CTSE_LoadLock load_lock = GetDataSource()->GetTSE_LoadLock(blob_id);
415 if ( !load_lock.IsLoaded() ) {
416 const CSRABlobId& sra_id = dynamic_cast<const CSRABlobId&>(*blob_id);
417 CRef<CSeq_entry> entry =
418 m_Impl->LoadSRAEntry(sra_id.m_Accession, sra_id.m_SpotId);
419 if ( entry ) {
420 load_lock->SetSeq_entry(*entry);
421 }
422 load_lock.SetLoaded();
423 }
424 return load_lock;
425 }
426
427
GetSequenceLength(const CSeq_id_Handle & idh)428 TSeqPos CSRADataLoader::GetSequenceLength(const CSeq_id_Handle& idh)
429 {
430 TReadId read_id = sx_GetReadId(idh);
431 if ( read_id.first ) {
432 const CSRABlobId& sra_id = *read_id.first;
433 return m_Impl->GetSequenceLength(sra_id.m_Accession,
434 sra_id.m_SpotId,
435 read_id.second);
436 }
437 return kInvalidSeqPos;
438 }
439
440
441 CDataLoader::STypeFound
GetSequenceTypeFound(const CSeq_id_Handle & idh)442 CSRADataLoader::GetSequenceTypeFound(const CSeq_id_Handle& idh)
443 {
444 STypeFound ret;
445 TReadId read_id = sx_GetReadId(idh);
446 if ( read_id.first ) {
447 const CSRABlobId& sra_id = *read_id.first;
448 ret.sequence_found = true;
449 ret.type = m_Impl->GetSequenceType(sra_id.m_Accession,
450 sra_id.m_SpotId,
451 read_id.second);
452 }
453 return ret;
454 }
455
456
GetDefaultPriority(void) const457 CObjectManager::TPriority CSRADataLoader::GetDefaultPriority(void) const
458 {
459 return CObjectManager::kPriority_Replace;
460 }
461
462
463 END_SCOPE(objects)
464
465 // ===========================================================================
466
467 USING_SCOPE(objects);
468
DataLoaders_Register_SRA(void)469 void DataLoaders_Register_SRA(void)
470 {
471 RegisterEntryPoint<CDataLoader>(NCBI_EntryPoint_DataLoader_Sra);
472 }
473
474
475 const string kDataLoader_Sra_DriverName("sra");
476
477 class CSRA_DataLoaderCF : public CDataLoaderFactory
478 {
479 public:
CSRA_DataLoaderCF(void)480 CSRA_DataLoaderCF(void)
481 : CDataLoaderFactory(kDataLoader_Sra_DriverName) {}
~CSRA_DataLoaderCF(void)482 virtual ~CSRA_DataLoaderCF(void) {}
483
484 protected:
485 virtual CDataLoader* CreateAndRegister(
486 CObjectManager& om,
487 const TPluginManagerParamTree* params) const;
488 };
489
490
CreateAndRegister(CObjectManager & om,const TPluginManagerParamTree * params) const491 CDataLoader* CSRA_DataLoaderCF::CreateAndRegister(
492 CObjectManager& om,
493 const TPluginManagerParamTree* params) const
494 {
495 if ( !ValidParams(params) ) {
496 // Use constructor without arguments
497 return CSRADataLoader::RegisterInObjectManager(om).GetLoader();
498 }
499 // IsDefault and Priority arguments may be specified
500 return CSRADataLoader::RegisterInObjectManager(
501 om,
502 GetIsDefault(params),
503 GetPriority(params)).GetLoader();
504 }
505
506
NCBI_EntryPoint_DataLoader_Sra(CPluginManager<CDataLoader>::TDriverInfoList & info_list,CPluginManager<CDataLoader>::EEntryPointRequest method)507 void NCBI_EntryPoint_DataLoader_Sra(
508 CPluginManager<CDataLoader>::TDriverInfoList& info_list,
509 CPluginManager<CDataLoader>::EEntryPointRequest method)
510 {
511 CHostEntryPointImpl<CSRA_DataLoaderCF>::NCBI_EntryPointImpl(info_list, method);
512 }
513
514
NCBI_EntryPoint_xloader_sra(CPluginManager<objects::CDataLoader>::TDriverInfoList & info_list,CPluginManager<objects::CDataLoader>::EEntryPointRequest method)515 void NCBI_EntryPoint_xloader_sra(
516 CPluginManager<objects::CDataLoader>::TDriverInfoList& info_list,
517 CPluginManager<objects::CDataLoader>::EEntryPointRequest method)
518 {
519 NCBI_EntryPoint_DataLoader_Sra(info_list, method);
520 }
521
522
523 END_NCBI_SCOPE
524