1 /* $Id: psg_loader.cpp 624399 2021-01-28 20:09:32Z ivanov $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Eugene Vasilchenko, Aleksey Grichenko
27 *
28 * File Description: PSG data loader
29 *
30 * ===========================================================================
31 */
32
33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbistd.hpp>
35
36 #include <objects/general/general__.hpp>
37 #include <objects/seqloc/Seq_id.hpp>
38 #include <objects/seq/seq__.hpp>
39 #include <objects/seqres/seqres__.hpp>
40
41 #include <objmgr/impl/data_source.hpp>
42 #include <objmgr/impl/tse_loadlock.hpp>
43 #include <objmgr/impl/tse_chunk_info.hpp>
44 #include <objmgr/data_loader_factory.hpp>
45 #include <corelib/plugin_manager_impl.hpp>
46 #include <corelib/plugin_manager_store.hpp>
47
48 #include <objtools/data_loaders/genbank/psg_loader.hpp>
49 #include <objtools/data_loaders/genbank/impl/psg_loader_impl.hpp>
50
51 #if defined(HAVE_PSG_LOADER)
52
53 BEGIN_NCBI_SCOPE
54 BEGIN_SCOPE(objects)
55
56 class CDataLoader;
57
58 /////////////////////////////////////////////////////////////////////////////
59 // CPSGDataLoader
60 /////////////////////////////////////////////////////////////////////////////
61
62
63 #define PSGLOADER_NAME "GBLOADER"
64 #define PSGLOADER_HUP_NAME "GBLOADER-HUP"
65
66 const char kDataLoader_PSG_DriverName[] = "psg";
67
RegisterInObjectManager(CObjectManager & om,const CGBLoaderParams & params,CObjectManager::EIsDefault is_default,CObjectManager::TPriority priority)68 CPSGDataLoader::TRegisterLoaderInfo CPSGDataLoader::RegisterInObjectManager(
69 CObjectManager& om,
70 const CGBLoaderParams& params,
71 CObjectManager::EIsDefault is_default,
72 CObjectManager::TPriority priority)
73 {
74 TMaker maker(params);
75 CDataLoader::RegisterInObjectManager(om, maker, is_default, priority);
76 return ConvertRegInfo(maker.GetRegisterInfo());
77 }
78
79
RegisterInObjectManager(CObjectManager & om,CObjectManager::EIsDefault is_default,CObjectManager::TPriority priority)80 CPSGDataLoader::TRegisterLoaderInfo CPSGDataLoader::RegisterInObjectManager(
81 CObjectManager& om,
82 CObjectManager::EIsDefault is_default,
83 CObjectManager::TPriority priority)
84 {
85 CGBLoaderParams params;
86 TMaker maker(params);
87 CDataLoader::RegisterInObjectManager(om, maker, is_default, priority);
88 return ConvertRegInfo(maker.GetRegisterInfo());
89 }
90
91
RegisterInObjectManager(CObjectManager & om,const TParamTree & param_tree,CObjectManager::EIsDefault is_default,CObjectManager::TPriority priority)92 CPSGDataLoader::TRegisterLoaderInfo CPSGDataLoader::RegisterInObjectManager(
93 CObjectManager& om,
94 const TParamTree& param_tree,
95 CObjectManager::EIsDefault is_default,
96 CObjectManager::TPriority priority)
97 {
98 CGBLoaderParams params(¶m_tree);
99 TMaker maker(params);
100 CDataLoader::RegisterInObjectManager(om, maker, is_default, priority);
101 return ConvertRegInfo(maker.GetRegisterInfo());
102 }
103
104
CPSGDataLoader(const string & loader_name,const CGBLoaderParams & params)105 CPSGDataLoader::CPSGDataLoader(const string& loader_name,
106 const CGBLoaderParams& params)
107 : CGBDataLoader(loader_name, params)
108 {
109 m_Impl.Reset(new CPSGDataLoader_Impl(params));
110 }
111
112
~CPSGDataLoader(void)113 CPSGDataLoader::~CPSGDataLoader(void)
114 {
115 }
116
117
GetBlobId(const CSeq_id_Handle & idh)118 CDataLoader::TBlobId CPSGDataLoader::GetBlobId(const CSeq_id_Handle& idh)
119 {
120 return TBlobId(m_Impl->GetBlobId(idh).GetPointerOrNull());
121 }
122
123
124 CDataLoader::TBlobId
GetBlobIdFromString(const string & str) const125 CPSGDataLoader::GetBlobIdFromString(const string& str) const
126 {
127 return TBlobId(new CPsgBlobId(str));
128 }
129
130
CanGetBlobById(void) const131 bool CPSGDataLoader::CanGetBlobById(void) const
132 {
133 return true;
134 }
135
136
137 CDataLoader::TTSE_LockSet
GetRecords(const CSeq_id_Handle & idh,EChoice choice)138 CPSGDataLoader::GetRecords(const CSeq_id_Handle& idh,
139 EChoice choice)
140 {
141 return m_Impl->GetRecords(GetDataSource(), idh, choice);
142 }
143
144
GetOrphanAnnotRecordsNA(const CSeq_id_Handle & idh,const SAnnotSelector * sel,TProcessedNAs * processed_nas)145 CPSGDataLoader::TTSE_LockSet CPSGDataLoader::GetOrphanAnnotRecordsNA(const CSeq_id_Handle& idh,
146 const SAnnotSelector* sel,
147 TProcessedNAs* processed_nas)
148 {
149 return CDataLoader::TTSE_LockSet();
150 }
151
152
GetExternalAnnotRecordsNA(const CSeq_id_Handle & idh,const SAnnotSelector * sel,TProcessedNAs * processed_nas)153 CPSGDataLoader::TTSE_LockSet CPSGDataLoader::GetExternalAnnotRecordsNA(const CSeq_id_Handle& idh,
154 const SAnnotSelector* sel,
155 TProcessedNAs* processed_nas)
156 {
157 return m_Impl->GetAnnotRecordsNA(GetDataSource(), idh, sel, processed_nas);
158 }
159
160
GetExternalAnnotRecordsNA(const CBioseq_Info & bioseq,const SAnnotSelector * sel,TProcessedNAs * processed_nas)161 CPSGDataLoader::TTSE_LockSet CPSGDataLoader::GetExternalAnnotRecordsNA(const CBioseq_Info& bioseq,
162 const SAnnotSelector* sel,
163 TProcessedNAs* processed_nas)
164 {
165 TTSE_LockSet ret;
166 ITERATE(CBioseq_Info::TId, it, bioseq.GetId()) {
167 TTSE_LockSet ret2 = m_Impl->GetAnnotRecordsNA(GetDataSource(), *it, sel, processed_nas);
168 if (!ret2.empty()) {
169 ret.swap(ret2);
170 break;
171 }
172 }
173 return ret;
174 }
175
176
GetChunk(TChunk chunk)177 void CPSGDataLoader::GetChunk(TChunk chunk)
178 {
179 m_Impl->LoadChunk(GetDataSource(), *chunk);
180 }
181
182
GetChunks(const TChunkSet & chunks)183 void CPSGDataLoader::GetChunks(const TChunkSet& chunks)
184 {
185 m_Impl->LoadChunks(GetDataSource(), chunks);
186 }
187
188
GetBlobs(TTSE_LockSets & tse_sets)189 void CPSGDataLoader::GetBlobs(TTSE_LockSets& tse_sets)
190 {
191 m_Impl->GetBlobs(GetDataSource(), tse_sets);
192 }
193
194
195 CDataLoader::TTSE_Lock
GetBlobById(const TBlobId & blob_id)196 CPSGDataLoader::GetBlobById(const TBlobId& blob_id)
197 {
198 return m_Impl->GetBlobById(GetDataSource(),
199 dynamic_cast<const CPsgBlobId&>(*blob_id));
200 }
201
202
GetIds(const CSeq_id_Handle & idh,TIds & ids)203 void CPSGDataLoader::GetIds(const CSeq_id_Handle& idh, TIds& ids)
204 {
205 m_Impl->GetIds(idh, ids);
206 }
207
208
209 CDataLoader::SGiFound
GetGiFound(const CSeq_id_Handle & idh)210 CPSGDataLoader::GetGiFound(const CSeq_id_Handle& idh)
211 {
212 return m_Impl->GetGi(idh);
213 }
214
215
216 CDataLoader::SAccVerFound
GetAccVerFound(const CSeq_id_Handle & idh)217 CPSGDataLoader::GetAccVerFound(const CSeq_id_Handle& idh)
218 {
219 return m_Impl->GetAccVer(idh);
220 }
221
222
GetTaxId(const CSeq_id_Handle & idh)223 TTaxId CPSGDataLoader::GetTaxId(const CSeq_id_Handle& idh)
224 {
225 auto taxid = m_Impl->GetTaxId(idh);
226 return taxid != INVALID_TAX_ID ? taxid : CDataLoader::GetTaxId(idh);
227 }
228
229
GetSequenceLength(const CSeq_id_Handle & idh)230 TSeqPos CPSGDataLoader::GetSequenceLength(const CSeq_id_Handle& idh)
231 {
232 return m_Impl->GetSequenceLength(idh);
233 }
234
235
236 CDataLoader::SHashFound
GetSequenceHashFound(const CSeq_id_Handle & idh)237 CPSGDataLoader::GetSequenceHashFound(const CSeq_id_Handle& idh)
238 {
239 return m_Impl->GetSequenceHash(idh);
240 }
241
242
243 CDataLoader::STypeFound
GetSequenceTypeFound(const CSeq_id_Handle & idh)244 CPSGDataLoader::GetSequenceTypeFound(const CSeq_id_Handle& idh)
245 {
246 return m_Impl->GetSequenceType(idh);
247 }
248
249
GetSequenceState(const CSeq_id_Handle & idh)250 int CPSGDataLoader::GetSequenceState(const CSeq_id_Handle& idh)
251 {
252 return m_Impl->GetSequenceState(idh);
253 }
254
255
DropTSE(CRef<CTSE_Info> tse_info)256 void CPSGDataLoader::DropTSE(CRef<CTSE_Info> tse_info)
257 {
258 m_Impl->DropTSE(dynamic_cast<const CPsgBlobId&>(*tse_info->GetBlobId()));
259 }
260
261
GetAccVers(const TIds & ids,TLoaded & loaded,TIds & ret)262 void CPSGDataLoader::GetAccVers(const TIds& ids, TLoaded& loaded, TIds& ret)
263 {
264 m_Impl->GetAccVers(ids, loaded, ret);
265 }
266
267
GetGis(const TIds & ids,TLoaded & loaded,TGis & ret)268 void CPSGDataLoader::GetGis(const TIds& ids, TLoaded& loaded, TGis& ret)
269 {
270 m_Impl->GetGis(ids, loaded, ret);
271 }
272
273
274 CGBDataLoader::TNamedAnnotNames
GetNamedAnnotAccessions(const CSeq_id_Handle & sih)275 CPSGDataLoader::GetNamedAnnotAccessions(const CSeq_id_Handle& sih)
276 {
277 TNamedAnnotNames names;
278
279 /*
280 CGBReaderRequestResult result(this, sih);
281 SAnnotSelector sel;
282 sel.IncludeNamedAnnotAccession("NA*");
283 CLoadLockBlobIds blobs(result, sih, &sel);
284 m_Dispatcher->LoadSeq_idBlob_ids(result, sih, &sel);
285 _ASSERT(blobs.IsLoaded());
286
287 CFixedBlob_ids blob_ids = blobs.GetBlob_ids();
288 if ((blob_ids.GetState() & CBioseq_Handle::fState_no_data) != 0) {
289 if (blob_ids.GetState() == CBioseq_Handle::fState_no_data) {
290 // default state - return empty name set
291 return names;
292 }
293 NCBI_THROW2(CBlobStateException, eBlobStateError,
294 "blob state error for " + sih.AsString(),
295 blob_ids.GetState());
296 }
297
298 ITERATE(CFixedBlob_ids, it, blob_ids) {
299 const CBlob_Info& info = *it;
300 if (!info.IsSetAnnotInfo()) {
301 continue;
302 }
303 CConstRef<CBlob_Annot_Info> annot_info = info.GetAnnotInfo();
304 ITERATE(CBlob_Annot_Info::TNamedAnnotNames, jt,
305 annot_info->GetNamedAnnotNames()) {
306 names.insert(*jt);
307 }
308 }
309 */
310
311 return names;
312 }
313
314
315 CGBDataLoader::TNamedAnnotNames
GetNamedAnnotAccessions(const CSeq_id_Handle & sih,const string & named_acc)316 CPSGDataLoader::GetNamedAnnotAccessions(const CSeq_id_Handle& sih,
317 const string& named_acc)
318 {
319 TNamedAnnotNames names;
320
321 /*
322 CGBReaderRequestResult result(this, sih);
323 SAnnotSelector sel;
324 if (!ExtractZoomLevel(named_acc, 0, 0)) {
325 sel.IncludeNamedAnnotAccession(CombineWithZoomLevel(named_acc, -1));
326 }
327 else {
328 sel.IncludeNamedAnnotAccession(named_acc);
329 }
330 CLoadLockBlobIds blobs(result, sih, &sel);
331 m_Dispatcher->LoadSeq_idBlob_ids(result, sih, &sel);
332 _ASSERT(blobs.IsLoaded());
333
334 CFixedBlob_ids blob_ids = blobs.GetBlob_ids();
335 if ((blob_ids.GetState() & CBioseq_Handle::fState_no_data) != 0) {
336 if (blob_ids.GetState() == CBioseq_Handle::fState_no_data) {
337 // default state - return empty name set
338 return names;
339 }
340 NCBI_THROW2(CBlobStateException, eBlobStateError,
341 "blob state error for " + sih.AsString(),
342 blob_ids.GetState());
343 }
344
345 ITERATE(CFixedBlob_ids, it, blob_ids) {
346 const CBlob_Info& info = *it;
347 if (!info.IsSetAnnotInfo()) {
348 continue;
349 }
350 CConstRef<CBlob_Annot_Info> annot_info = info.GetAnnotInfo();
351 ITERATE(CBlob_Annot_Info::TNamedAnnotNames, jt,
352 annot_info->GetNamedAnnotNames()) {
353 names.insert(*jt);
354 }
355 }
356 */
357
358 return names;
359 }
360
361
ConvertRegInfo(const TMaker::TRegisterInfo & info)362 CGBDataLoader::TRegisterLoaderInfo CPSGDataLoader::ConvertRegInfo(const TMaker::TRegisterInfo& info)
363 {
364 TRegisterLoaderInfo ret;
365 ret.Set(info.GetLoader(), info.IsCreated());
366 return ret;
367 }
368
369
370 END_SCOPE(objects)
371
372 // ===========================================================================
373
374 USING_SCOPE(objects);
375
376 class CPSG_DataLoaderCF : public CDataLoaderFactory
377 {
378 public:
CPSG_DataLoaderCF(void)379 CPSG_DataLoaderCF(void)
380 : CDataLoaderFactory(objects::kDataLoader_PSG_DriverName) {}
~CPSG_DataLoaderCF(void)381 virtual ~CPSG_DataLoaderCF(void) {}
382
383 protected:
384 virtual CDataLoader* CreateAndRegister(
385 CObjectManager& om,
386 const TPluginManagerParamTree* params) const;
387 };
388
389
CreateAndRegister(CObjectManager & om,const TPluginManagerParamTree * params) const390 CDataLoader* CPSG_DataLoaderCF::CreateAndRegister(
391 CObjectManager& om,
392 const TPluginManagerParamTree* params) const
393 {
394 if ( !ValidParams(params) ) {
395 // Use constructor without arguments
396 return CPSGDataLoader::RegisterInObjectManager(om).GetLoader();
397 }
398 // IsDefault and Priority arguments may be specified
399 return CPSGDataLoader::RegisterInObjectManager(
400 om,
401 GetIsDefault(params),
402 GetPriority(params)).GetLoader();
403 }
404
405 END_NCBI_SCOPE
406
407 #endif // HAVE_PSG_LOADER
408