1 /*  $Id: cdd_processor.cpp 629860 2021-04-22 16:38:08Z ivanov $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Aleksey Grichenko
27  *
28  * File Description: processor for data from CDD
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
33 
34 #include "cdd_processor.hpp"
35 #include "pubseq_gateway.hpp"
36 #include "pubseq_gateway_convert_utils.hpp"
37 #include <objtools/data_loaders/cdd/cdd_access/cdd_client.hpp>
38 #include <objtools/data_loaders/cdd/cdd_access/CDD_Rep_Get_Blob_By_Seq_Id.hpp>
39 #include <objtools/data_loaders/cdd/cdd_access/CDD_Reply_Get_Blob_Id.hpp>
40 #include <objects/id2/ID2_Blob_Id.hpp>
41 #include <objects/seqsplit/ID2S_Seq_annot_Info.hpp>
42 #include <objects/seqsplit/ID2S_Feat_type_Info.hpp>
43 #include <objects/seqsplit/ID2S_Seq_loc.hpp>
44 #include <objects/seqfeat/SeqFeatData.hpp>
45 #include <objects/seq/Seq_annot.hpp>
46 #include <objects/seqset/seqset__.hpp>
47 
48 BEGIN_NCBI_NAMESPACE;
49 BEGIN_NAMESPACE(psg);
50 BEGIN_NAMESPACE(cdd);
51 
52 USING_SCOPE(objects);
53 
54 static const string kCDDAnnotName = "CDD";
55 static const string kCDDProcessorName = "CDD";
56 const CID2_Blob_Id::TSat kCDDSat = 8087;
57 
CPSGS_CDDProcessor(void)58 CPSGS_CDDProcessor::CPSGS_CDDProcessor(void)
59     : m_ClientPool(new CCDDClientPool()),
60       m_Status(ePSGS_InProgress)
61 {
62 }
63 
CPSGS_CDDProcessor(shared_ptr<CCDDClientPool> client_pool,shared_ptr<CPSGS_Request> request,shared_ptr<CPSGS_Reply> reply,TProcessorPriority priority)64 CPSGS_CDDProcessor::CPSGS_CDDProcessor(
65     shared_ptr<CCDDClientPool> client_pool,
66     shared_ptr<CPSGS_Request> request,
67     shared_ptr<CPSGS_Reply> reply,
68     TProcessorPriority priority)
69     : m_ClientPool(client_pool),
70       m_Status(ePSGS_InProgress)
71 {
72     m_Request = request;
73     m_Reply = reply;
74     m_Priority = priority;
75 }
76 
~CPSGS_CDDProcessor(void)77 CPSGS_CDDProcessor::~CPSGS_CDDProcessor(void)
78 {
79 }
80 
81 
82 IPSGS_Processor*
CreateProcessor(shared_ptr<CPSGS_Request> request,shared_ptr<CPSGS_Reply> reply,TProcessorPriority priority) const83 CPSGS_CDDProcessor::CreateProcessor(shared_ptr<CPSGS_Request> request,
84                                     shared_ptr<CPSGS_Reply> reply,
85                                     TProcessorPriority priority) const
86 {
87     auto req_type = request->GetRequestType();
88     if (req_type != CPSGS_Request::ePSGS_AnnotationRequest &&
89         req_type != CPSGS_Request::ePSGS_BlobBySatSatKeyRequest) return nullptr;
90 
91     auto app = CPubseqGatewayApp::GetInstance();
92     bool enabled = app->GetCDDProcessorsEnabled();
93     if ( enabled ) {
94         for (const auto& name : request->GetRequest<SPSGS_RequestBase>().m_DisabledProcessors ) {
95             if ( NStr::EqualNocase(name, kCDDProcessorName) ) {
96                 enabled = false;
97                 break;
98             }
99         }
100     }
101     else {
102         for (const auto& name : request->GetRequest<SPSGS_RequestBase>().m_EnabledProcessors ) {
103             if ( NStr::EqualNocase(name, kCDDProcessorName) ) {
104                 enabled = true;
105                 break;
106             }
107         }
108     }
109     if ( !enabled ) return nullptr;
110 
111     if (req_type == CPSGS_Request::ePSGS_AnnotationRequest &&
112         !x_CanProcessAnnotRequest(request->GetRequest<SPSGS_AnnotRequest>(), priority))
113         return nullptr;
114     if (req_type == CPSGS_Request::ePSGS_BlobBySatSatKeyRequest &&
115         !x_CanProcessBlobRequest(request->GetRequest<SPSGS_BlobBySatSatKeyRequest>()))
116         return nullptr;
117 
118     return new CPSGS_CDDProcessor(m_ClientPool, request, reply, priority);
119 }
120 
121 
GetName() const122 string CPSGS_CDDProcessor::GetName() const
123 {
124     return kCDDProcessorName;
125 }
126 
127 
Process()128 void CPSGS_CDDProcessor::Process()
129 {
130     CRequestContextResetter     context_resetter;
131     GetRequest()->SetRequestContext();
132 
133     auto req_type = GetRequest()->GetRequestType();
134     switch (req_type) {
135     case CPSGS_Request::ePSGS_AnnotationRequest:
136         x_ProcessResolveRequest();
137         break;
138     case CPSGS_Request::ePSGS_BlobBySatSatKeyRequest:
139         x_ProcessGetBlobRequest();
140         break;
141     default:
142         x_Finish(ePSGS_Error);
143         break;
144     }
145 }
146 
147 
x_ProcessResolveRequest(void)148 void CPSGS_CDDProcessor::x_ProcessResolveRequest(void)
149 {
150     SPSGS_AnnotRequest& annot_request = GetRequest()->GetRequest<SPSGS_AnnotRequest>();
151     if ( !x_NameIncluded(annot_request.GetNotProcessedName(m_Priority)) ) {
152         x_Finish(ePSGS_NotFound);
153         return;
154     }
155     CSeq_id_Handle idh;
156     try {
157         CSeq_id id(annot_request.m_SeqId);
158         idh = CSeq_id_Handle::GetHandle(id);
159     }
160     catch (exception& e) {
161         ERR_POST("Bad seq-id: " << annot_request.m_SeqId);
162         x_Finish(ePSGS_Error);
163         return;
164     }
165 
166     if (annot_request.m_TSEOption == SPSGS_BlobRequestBase::EPSGS_TSEOption::ePSGS_SmartTSE ||
167         annot_request.m_TSEOption == SPSGS_BlobRequestBase::EPSGS_TSEOption::ePSGS_WholeTSE ||
168         annot_request.m_TSEOption == SPSGS_BlobRequestBase::EPSGS_TSEOption::ePSGS_OrigTSE) {
169         // Send whole TSE.
170         CCDDClientPool::TSeq_idSet ids;
171         ids.insert(idh);
172         CCDDClientPool::SCDDBlob cdd_blob = m_ClientPool->GetBlobBySeq_ids(ids);
173         if ( !cdd_blob.info  ||  !cdd_blob.data ) {
174             x_Finish(ePSGS_NotFound);
175             return;
176         }
177         if (SignalStartProcessing() == ePSGS_Cancel) {
178             x_Finish(ePSGS_Cancelled);
179             return;
180         }
181         x_SendAnnotInfo(*cdd_blob.info);
182         x_SendAnnot(cdd_blob.info->GetBlob_id(), cdd_blob.data);
183     }
184     else {
185         // Send annot info only.
186         CCDDClientPool::TBlobInfo blob_info = m_ClientPool->GetBlobIdBySeq_id(idh);
187         if ( !blob_info ) {
188             x_Finish(ePSGS_NotFound);
189             return;
190         }
191         if (SignalStartProcessing() == ePSGS_Cancel) {
192             x_Finish(ePSGS_Cancelled);
193             return;
194         }
195         x_SendAnnotInfo(*blob_info);
196     }
197     x_Finish(ePSGS_Found);
198 }
199 
200 
x_ProcessGetBlobRequest(void)201 void CPSGS_CDDProcessor::x_ProcessGetBlobRequest(void)
202 {
203     SPSGS_BlobBySatSatKeyRequest blob_request =
204         GetRequest()->GetRequest<SPSGS_BlobBySatSatKeyRequest>();
205     CRef<CCDDClientPool::TBlobId> blob_id =
206         CCDDClientPool::StringToBlobId(blob_request.m_BlobId.GetId());
207     if ( !blob_id ) {
208         x_Finish(ePSGS_NotFound);
209         return;
210     }
211     CCDDClientPool::TBlobData annot = m_ClientPool->GetBlobByBlobId(*blob_id);
212     if ( !annot ) {
213         x_Finish(ePSGS_NotFound);
214         return;
215     }
216     if (SignalStartProcessing() == ePSGS_Cancel) {
217         x_Finish(ePSGS_Cancelled);
218         return;
219     }
220 
221     x_SendAnnot(*blob_id, annot);
222 
223     x_Finish(ePSGS_Found);
224 }
225 
226 
x_SendAnnotInfo(const CCDD_Reply_Get_Blob_Id & blob_info)227 void CPSGS_CDDProcessor::x_SendAnnotInfo(const CCDD_Reply_Get_Blob_Id& blob_info)
228 {
229     SPSGS_AnnotRequest& annot_request = GetRequest()->GetRequest<SPSGS_AnnotRequest>();
230     const CID2_Blob_Id& blob_id = blob_info.GetBlob_id();
231     CJsonNode       json(CJsonNode::NewObjectNode());
232     json.SetString("blob_id", CCDDClientPool::BlobIdToString(blob_id));
233     if ( blob_id.IsSetVersion() ) {
234         json.SetInteger("last_modified", blob_id.GetVersion()*60000);
235     }
236 
237     CRef<CID2S_Seq_annot_Info> annot_info(new CID2S_Seq_annot_Info);
238     annot_info->SetName(kCDDAnnotName);
239     CRef<CID2S_Feat_type_Info> feat_info(new CID2S_Feat_type_Info);
240     feat_info->SetType(CSeqFeatData::e_Region);
241     feat_info->SetSubtypes().push_back(CSeqFeatData::eSubtype_region);
242     annot_info->SetFeat().push_back(feat_info);
243     feat_info.Reset(new CID2S_Feat_type_Info);
244     feat_info->SetType(CSeqFeatData::e_Site);
245     feat_info->SetSubtypes().push_back(CSeqFeatData::eSubtype_site);
246     annot_info->SetFeat().push_back(feat_info);
247 
248     const CSeq_id& annot_id = blob_info.GetSeq_id();
249     if ( annot_id.IsGi() ) {
250         annot_info->SetSeq_loc().SetWhole_gi(annot_id.GetGi());
251     }
252     else {
253         annot_info->SetSeq_loc().SetWhole_seq_id().Assign(annot_id);
254     }
255 
256     ostringstream annot_str;
257     annot_str << MSerial_AsnBinary << *annot_info;
258     json.SetString("seq_annot_info", NStr::Base64Encode(annot_str.str(), 0));
259 
260     GetReply()->PrepareNamedAnnotationData(kCDDAnnotName, kCDDProcessorName,
261         json.Repr(CJsonNode::fStandardJson));
262     annot_request.RegisterProcessedName(GetPriority(), kCDDAnnotName);
263 }
264 
265 
x_SendAnnot(const CID2_Blob_Id & id2_blob_id,CRef<CSeq_annot> & annot)266 void CPSGS_CDDProcessor::x_SendAnnot(const CID2_Blob_Id& id2_blob_id, CRef<CSeq_annot>& annot)
267 {
268     string psg_blob_id = CCDDClientPool::BlobIdToString(id2_blob_id);
269     CRef<CSeq_entry> entry(new CSeq_entry);
270     entry->SetSet().SetSeq_set();
271     entry->SetAnnot().push_back(annot);
272     ostringstream blob_str;
273     blob_str << MSerial_AsnBinary << *entry;
274     string blob_data = blob_str.str();
275 
276     CBlobRecord blob_props;
277     if (id2_blob_id.IsSetVersion()) {
278         blob_props.SetModified(int64_t(id2_blob_id.GetVersion()*60000));
279     }
280     blob_props.SetNChunks(1);
281     size_t item_id = GetReply()->GetItemId();
282     GetReply()->PrepareBlobPropData(
283         item_id,
284         kCDDProcessorName,
285         psg_blob_id,
286         ToJson(blob_props).Repr(CJsonNode::fStandardJson));
287     GetReply()->PrepareBlobPropCompletion(item_id, kCDDProcessorName, 2);
288 
289     item_id = GetReply()->GetItemId();
290     GetReply()->PrepareBlobData(
291         item_id,
292         kCDDProcessorName,
293         psg_blob_id,
294         (const unsigned char*)blob_data.data(), blob_data.size(), 0);
295     GetReply()->PrepareBlobCompletion(item_id, kCDDProcessorName, 2);
296 }
297 
298 
Cancel()299 void CPSGS_CDDProcessor::Cancel()
300 {
301     m_Status = ePSGS_Cancelled;
302 }
303 
304 
GetStatus()305 IPSGS_Processor::EPSGS_Status CPSGS_CDDProcessor::GetStatus()
306 {
307     return m_Status;
308 }
309 
310 
x_CanProcessAnnotRequest(SPSGS_AnnotRequest & annot_request,TProcessorPriority priority) const311 bool CPSGS_CDDProcessor::x_CanProcessAnnotRequest(SPSGS_AnnotRequest& annot_request,
312                                                   TProcessorPriority priority) const
313 {
314     CSeq_id id(annot_request.m_SeqId);
315     if (!id.IsGi() && !id.GetTextseq_Id()) return false;
316     if (!m_ClientPool->IsValidId(id)) return false;
317     return x_NameIncluded(annot_request.GetNotProcessedName(priority));
318 }
319 
320 
x_CanProcessBlobRequest(SPSGS_BlobBySatSatKeyRequest & blob_request) const321 bool CPSGS_CDDProcessor::x_CanProcessBlobRequest(SPSGS_BlobBySatSatKeyRequest& blob_request) const
322 {
323     CRef<CCDDClientPool::TBlobId> blob_id =
324         CCDDClientPool::StringToBlobId(blob_request.m_BlobId.GetId());
325     return blob_id && blob_id->GetSat() == kCDDSat;
326 }
327 
328 
x_NameIncluded(const vector<string> & names) const329 bool CPSGS_CDDProcessor::x_NameIncluded(const vector<string>& names) const
330 {
331     for ( auto& name : names ) {
332         if ( NStr::EqualNocase(name, kCDDAnnotName) ) return true;
333     }
334     return false;
335 }
336 
337 
x_Finish(EPSGS_Status status)338 void CPSGS_CDDProcessor::x_Finish(EPSGS_Status status)
339 {
340     if (status != ePSGS_Cancelled) m_Status = status;
341     SignalFinishProcessing();
342 }
343 
344 
345 END_NAMESPACE(cdd);
346 END_NAMESPACE(psg);
347 END_NCBI_NAMESPACE;
348