1 /* $Id: cdd_processor.cpp 629860 2021-04-22 16:38:08Z ivanov $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Authors: Aleksey Grichenko
27 *
28 * File Description: processor for data from CDD
29 *
30 */
31
32 #include <ncbi_pch.hpp>
33
34 #include "cdd_processor.hpp"
35 #include "pubseq_gateway.hpp"
36 #include "pubseq_gateway_convert_utils.hpp"
37 #include <objtools/data_loaders/cdd/cdd_access/cdd_client.hpp>
38 #include <objtools/data_loaders/cdd/cdd_access/CDD_Rep_Get_Blob_By_Seq_Id.hpp>
39 #include <objtools/data_loaders/cdd/cdd_access/CDD_Reply_Get_Blob_Id.hpp>
40 #include <objects/id2/ID2_Blob_Id.hpp>
41 #include <objects/seqsplit/ID2S_Seq_annot_Info.hpp>
42 #include <objects/seqsplit/ID2S_Feat_type_Info.hpp>
43 #include <objects/seqsplit/ID2S_Seq_loc.hpp>
44 #include <objects/seqfeat/SeqFeatData.hpp>
45 #include <objects/seq/Seq_annot.hpp>
46 #include <objects/seqset/seqset__.hpp>
47
48 BEGIN_NCBI_NAMESPACE;
49 BEGIN_NAMESPACE(psg);
50 BEGIN_NAMESPACE(cdd);
51
52 USING_SCOPE(objects);
53
54 static const string kCDDAnnotName = "CDD";
55 static const string kCDDProcessorName = "CDD";
56 const CID2_Blob_Id::TSat kCDDSat = 8087;
57
CPSGS_CDDProcessor(void)58 CPSGS_CDDProcessor::CPSGS_CDDProcessor(void)
59 : m_ClientPool(new CCDDClientPool()),
60 m_Status(ePSGS_InProgress)
61 {
62 }
63
CPSGS_CDDProcessor(shared_ptr<CCDDClientPool> client_pool,shared_ptr<CPSGS_Request> request,shared_ptr<CPSGS_Reply> reply,TProcessorPriority priority)64 CPSGS_CDDProcessor::CPSGS_CDDProcessor(
65 shared_ptr<CCDDClientPool> client_pool,
66 shared_ptr<CPSGS_Request> request,
67 shared_ptr<CPSGS_Reply> reply,
68 TProcessorPriority priority)
69 : m_ClientPool(client_pool),
70 m_Status(ePSGS_InProgress)
71 {
72 m_Request = request;
73 m_Reply = reply;
74 m_Priority = priority;
75 }
76
~CPSGS_CDDProcessor(void)77 CPSGS_CDDProcessor::~CPSGS_CDDProcessor(void)
78 {
79 }
80
81
82 IPSGS_Processor*
CreateProcessor(shared_ptr<CPSGS_Request> request,shared_ptr<CPSGS_Reply> reply,TProcessorPriority priority) const83 CPSGS_CDDProcessor::CreateProcessor(shared_ptr<CPSGS_Request> request,
84 shared_ptr<CPSGS_Reply> reply,
85 TProcessorPriority priority) const
86 {
87 auto req_type = request->GetRequestType();
88 if (req_type != CPSGS_Request::ePSGS_AnnotationRequest &&
89 req_type != CPSGS_Request::ePSGS_BlobBySatSatKeyRequest) return nullptr;
90
91 auto app = CPubseqGatewayApp::GetInstance();
92 bool enabled = app->GetCDDProcessorsEnabled();
93 if ( enabled ) {
94 for (const auto& name : request->GetRequest<SPSGS_RequestBase>().m_DisabledProcessors ) {
95 if ( NStr::EqualNocase(name, kCDDProcessorName) ) {
96 enabled = false;
97 break;
98 }
99 }
100 }
101 else {
102 for (const auto& name : request->GetRequest<SPSGS_RequestBase>().m_EnabledProcessors ) {
103 if ( NStr::EqualNocase(name, kCDDProcessorName) ) {
104 enabled = true;
105 break;
106 }
107 }
108 }
109 if ( !enabled ) return nullptr;
110
111 if (req_type == CPSGS_Request::ePSGS_AnnotationRequest &&
112 !x_CanProcessAnnotRequest(request->GetRequest<SPSGS_AnnotRequest>(), priority))
113 return nullptr;
114 if (req_type == CPSGS_Request::ePSGS_BlobBySatSatKeyRequest &&
115 !x_CanProcessBlobRequest(request->GetRequest<SPSGS_BlobBySatSatKeyRequest>()))
116 return nullptr;
117
118 return new CPSGS_CDDProcessor(m_ClientPool, request, reply, priority);
119 }
120
121
GetName() const122 string CPSGS_CDDProcessor::GetName() const
123 {
124 return kCDDProcessorName;
125 }
126
127
Process()128 void CPSGS_CDDProcessor::Process()
129 {
130 CRequestContextResetter context_resetter;
131 GetRequest()->SetRequestContext();
132
133 auto req_type = GetRequest()->GetRequestType();
134 switch (req_type) {
135 case CPSGS_Request::ePSGS_AnnotationRequest:
136 x_ProcessResolveRequest();
137 break;
138 case CPSGS_Request::ePSGS_BlobBySatSatKeyRequest:
139 x_ProcessGetBlobRequest();
140 break;
141 default:
142 x_Finish(ePSGS_Error);
143 break;
144 }
145 }
146
147
x_ProcessResolveRequest(void)148 void CPSGS_CDDProcessor::x_ProcessResolveRequest(void)
149 {
150 SPSGS_AnnotRequest& annot_request = GetRequest()->GetRequest<SPSGS_AnnotRequest>();
151 if ( !x_NameIncluded(annot_request.GetNotProcessedName(m_Priority)) ) {
152 x_Finish(ePSGS_NotFound);
153 return;
154 }
155 CSeq_id_Handle idh;
156 try {
157 CSeq_id id(annot_request.m_SeqId);
158 idh = CSeq_id_Handle::GetHandle(id);
159 }
160 catch (exception& e) {
161 ERR_POST("Bad seq-id: " << annot_request.m_SeqId);
162 x_Finish(ePSGS_Error);
163 return;
164 }
165
166 if (annot_request.m_TSEOption == SPSGS_BlobRequestBase::EPSGS_TSEOption::ePSGS_SmartTSE ||
167 annot_request.m_TSEOption == SPSGS_BlobRequestBase::EPSGS_TSEOption::ePSGS_WholeTSE ||
168 annot_request.m_TSEOption == SPSGS_BlobRequestBase::EPSGS_TSEOption::ePSGS_OrigTSE) {
169 // Send whole TSE.
170 CCDDClientPool::TSeq_idSet ids;
171 ids.insert(idh);
172 CCDDClientPool::SCDDBlob cdd_blob = m_ClientPool->GetBlobBySeq_ids(ids);
173 if ( !cdd_blob.info || !cdd_blob.data ) {
174 x_Finish(ePSGS_NotFound);
175 return;
176 }
177 if (SignalStartProcessing() == ePSGS_Cancel) {
178 x_Finish(ePSGS_Cancelled);
179 return;
180 }
181 x_SendAnnotInfo(*cdd_blob.info);
182 x_SendAnnot(cdd_blob.info->GetBlob_id(), cdd_blob.data);
183 }
184 else {
185 // Send annot info only.
186 CCDDClientPool::TBlobInfo blob_info = m_ClientPool->GetBlobIdBySeq_id(idh);
187 if ( !blob_info ) {
188 x_Finish(ePSGS_NotFound);
189 return;
190 }
191 if (SignalStartProcessing() == ePSGS_Cancel) {
192 x_Finish(ePSGS_Cancelled);
193 return;
194 }
195 x_SendAnnotInfo(*blob_info);
196 }
197 x_Finish(ePSGS_Found);
198 }
199
200
x_ProcessGetBlobRequest(void)201 void CPSGS_CDDProcessor::x_ProcessGetBlobRequest(void)
202 {
203 SPSGS_BlobBySatSatKeyRequest blob_request =
204 GetRequest()->GetRequest<SPSGS_BlobBySatSatKeyRequest>();
205 CRef<CCDDClientPool::TBlobId> blob_id =
206 CCDDClientPool::StringToBlobId(blob_request.m_BlobId.GetId());
207 if ( !blob_id ) {
208 x_Finish(ePSGS_NotFound);
209 return;
210 }
211 CCDDClientPool::TBlobData annot = m_ClientPool->GetBlobByBlobId(*blob_id);
212 if ( !annot ) {
213 x_Finish(ePSGS_NotFound);
214 return;
215 }
216 if (SignalStartProcessing() == ePSGS_Cancel) {
217 x_Finish(ePSGS_Cancelled);
218 return;
219 }
220
221 x_SendAnnot(*blob_id, annot);
222
223 x_Finish(ePSGS_Found);
224 }
225
226
x_SendAnnotInfo(const CCDD_Reply_Get_Blob_Id & blob_info)227 void CPSGS_CDDProcessor::x_SendAnnotInfo(const CCDD_Reply_Get_Blob_Id& blob_info)
228 {
229 SPSGS_AnnotRequest& annot_request = GetRequest()->GetRequest<SPSGS_AnnotRequest>();
230 const CID2_Blob_Id& blob_id = blob_info.GetBlob_id();
231 CJsonNode json(CJsonNode::NewObjectNode());
232 json.SetString("blob_id", CCDDClientPool::BlobIdToString(blob_id));
233 if ( blob_id.IsSetVersion() ) {
234 json.SetInteger("last_modified", blob_id.GetVersion()*60000);
235 }
236
237 CRef<CID2S_Seq_annot_Info> annot_info(new CID2S_Seq_annot_Info);
238 annot_info->SetName(kCDDAnnotName);
239 CRef<CID2S_Feat_type_Info> feat_info(new CID2S_Feat_type_Info);
240 feat_info->SetType(CSeqFeatData::e_Region);
241 feat_info->SetSubtypes().push_back(CSeqFeatData::eSubtype_region);
242 annot_info->SetFeat().push_back(feat_info);
243 feat_info.Reset(new CID2S_Feat_type_Info);
244 feat_info->SetType(CSeqFeatData::e_Site);
245 feat_info->SetSubtypes().push_back(CSeqFeatData::eSubtype_site);
246 annot_info->SetFeat().push_back(feat_info);
247
248 const CSeq_id& annot_id = blob_info.GetSeq_id();
249 if ( annot_id.IsGi() ) {
250 annot_info->SetSeq_loc().SetWhole_gi(annot_id.GetGi());
251 }
252 else {
253 annot_info->SetSeq_loc().SetWhole_seq_id().Assign(annot_id);
254 }
255
256 ostringstream annot_str;
257 annot_str << MSerial_AsnBinary << *annot_info;
258 json.SetString("seq_annot_info", NStr::Base64Encode(annot_str.str(), 0));
259
260 GetReply()->PrepareNamedAnnotationData(kCDDAnnotName, kCDDProcessorName,
261 json.Repr(CJsonNode::fStandardJson));
262 annot_request.RegisterProcessedName(GetPriority(), kCDDAnnotName);
263 }
264
265
x_SendAnnot(const CID2_Blob_Id & id2_blob_id,CRef<CSeq_annot> & annot)266 void CPSGS_CDDProcessor::x_SendAnnot(const CID2_Blob_Id& id2_blob_id, CRef<CSeq_annot>& annot)
267 {
268 string psg_blob_id = CCDDClientPool::BlobIdToString(id2_blob_id);
269 CRef<CSeq_entry> entry(new CSeq_entry);
270 entry->SetSet().SetSeq_set();
271 entry->SetAnnot().push_back(annot);
272 ostringstream blob_str;
273 blob_str << MSerial_AsnBinary << *entry;
274 string blob_data = blob_str.str();
275
276 CBlobRecord blob_props;
277 if (id2_blob_id.IsSetVersion()) {
278 blob_props.SetModified(int64_t(id2_blob_id.GetVersion()*60000));
279 }
280 blob_props.SetNChunks(1);
281 size_t item_id = GetReply()->GetItemId();
282 GetReply()->PrepareBlobPropData(
283 item_id,
284 kCDDProcessorName,
285 psg_blob_id,
286 ToJson(blob_props).Repr(CJsonNode::fStandardJson));
287 GetReply()->PrepareBlobPropCompletion(item_id, kCDDProcessorName, 2);
288
289 item_id = GetReply()->GetItemId();
290 GetReply()->PrepareBlobData(
291 item_id,
292 kCDDProcessorName,
293 psg_blob_id,
294 (const unsigned char*)blob_data.data(), blob_data.size(), 0);
295 GetReply()->PrepareBlobCompletion(item_id, kCDDProcessorName, 2);
296 }
297
298
Cancel()299 void CPSGS_CDDProcessor::Cancel()
300 {
301 m_Status = ePSGS_Cancelled;
302 }
303
304
GetStatus()305 IPSGS_Processor::EPSGS_Status CPSGS_CDDProcessor::GetStatus()
306 {
307 return m_Status;
308 }
309
310
x_CanProcessAnnotRequest(SPSGS_AnnotRequest & annot_request,TProcessorPriority priority) const311 bool CPSGS_CDDProcessor::x_CanProcessAnnotRequest(SPSGS_AnnotRequest& annot_request,
312 TProcessorPriority priority) const
313 {
314 CSeq_id id(annot_request.m_SeqId);
315 if (!id.IsGi() && !id.GetTextseq_Id()) return false;
316 if (!m_ClientPool->IsValidId(id)) return false;
317 return x_NameIncluded(annot_request.GetNotProcessedName(priority));
318 }
319
320
x_CanProcessBlobRequest(SPSGS_BlobBySatSatKeyRequest & blob_request) const321 bool CPSGS_CDDProcessor::x_CanProcessBlobRequest(SPSGS_BlobBySatSatKeyRequest& blob_request) const
322 {
323 CRef<CCDDClientPool::TBlobId> blob_id =
324 CCDDClientPool::StringToBlobId(blob_request.m_BlobId.GetId());
325 return blob_id && blob_id->GetSat() == kCDDSat;
326 }
327
328
x_NameIncluded(const vector<string> & names) const329 bool CPSGS_CDDProcessor::x_NameIncluded(const vector<string>& names) const
330 {
331 for ( auto& name : names ) {
332 if ( NStr::EqualNocase(name, kCDDAnnotName) ) return true;
333 }
334 return false;
335 }
336
337
x_Finish(EPSGS_Status status)338 void CPSGS_CDDProcessor::x_Finish(EPSGS_Status status)
339 {
340 if (status != ePSGS_Cancelled) m_Status = status;
341 SignalFinishProcessing();
342 }
343
344
345 END_NAMESPACE(cdd);
346 END_NAMESPACE(psg);
347 END_NCBI_NAMESPACE;
348