1 /* $Id: entrez2_client.cpp 607107 2020-04-30 12:37:23Z grichenk $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author:  Josh Cherry
27  *
28  * File Description:
29  *   Entrez2 network client
30  *
31  * Remark:
32  *   This code was originally generated by application DATATOOL
33  *   using specifications from the data definition file
34  *   'entrez2.asn'.
35  */
36 
37 // standard includes
38 
39 // generated includes
40 
41 #include <ncbi_pch.hpp>
42 #include <objects/entrez2/entrez2_client.hpp>
43 #include <objects/entrez2/Entrez2_boolean_element.hpp>
44 #include <objects/entrez2/Entrez2_boolean_exp.hpp>
45 #include <objects/entrez2/Entrez2_boolean_reply.hpp>
46 #include <objects/entrez2/Entrez2_db_id.hpp>
47 #include <objects/entrez2/Entrez2_eval_boolean.hpp>
48 #include <objects/entrez2/Entrez2_get_links.hpp>
49 #include <objects/entrez2/Entrez2_hier_query.hpp>
50 #include <objects/entrez2/Entrez2_id.hpp>
51 #include <objects/entrez2/Entrez2_id_list.hpp>
52 #include <objects/entrez2/Entrez2_limits.hpp>
53 #include <objects/entrez2/Entrez2_link_id.hpp>
54 #include <objects/entrez2/Entrez2_term_pos.hpp>
55 #include <objects/entrez2/Entrez2_term_query.hpp>
56 
57 #include <algorithm>
58 
59 // generated classes
60 
61 BEGIN_NCBI_SCOPE
62 
63 BEGIN_objects_SCOPE // namespace ncbi::objects::
64 
65 // destructor
~CEntrez2Client(void)66 CEntrez2Client::~CEntrez2Client(void)
67 {
68 }
69 
70 
71 /// A simplified interface for getting neighbors (links)
72 
73 /// This form just yields a vector of UIDs
GetNeighbors(TUid query_uid,const string & db,const string & link_type,vector<TUid> & neighbor_uids)74 void CEntrez2Client::GetNeighbors(TUid query_uid,
75                                   const string& db,
76                                   const string& link_type,
77                                   vector<TUid>& neighbor_uids)
78 {
79     vector<TUid> uids;
80     uids.push_back(query_uid);
81     GetNeighbors(uids, db, link_type, neighbor_uids);
82 }
83 
84 
85 /// This form just yields a vector of UIDs
GetNeighbors(const vector<TUid> & query_uids,const string & db,const string & link_type,vector<TUid> & neighbor_uids)86 void CEntrez2Client::GetNeighbors(const vector<TUid>& query_uids,
87                                   const string& db,
88                                   const string& link_type,
89                                   vector<TUid>& neighbor_uids)
90 {
91     // first retrieve the link_set
92     CRef<CEntrez2_link_set> link_set;
93     link_set = GetNeighbors(query_uids, db, link_type);
94 
95     // then extract the UIDs
96     CEntrez2_id_list::TConstUidIterator it
97         = link_set->GetIds().GetConstUidIterator();
98     if (link_set->GetIds().IsSetNum()) {
99         neighbor_uids.reserve(link_set->GetIds().GetNum());
100     }
101     for ( ;  !it.AtEnd();  ++it) {
102         neighbor_uids.push_back(*it);
103     }
104 }
105 
106 
107 /// This form returns the entire CEntrez2_link_set object,
108 /// which includes scores.
GetNeighbors(TUid query_uid,const string & db,const string & link_type)109 CRef<CEntrez2_link_set> CEntrez2Client::GetNeighbors(TUid query_uid,
110                                                      const string& db,
111                                                      const string& link_type)
112 {
113     vector<TUid> uids;
114     uids.push_back(query_uid);
115     return GetNeighbors(uids, db, link_type);
116 }
117 
118 
119 /// This form returns the entire CEntrez2_link_set object,
120 /// which includes scores.
121 CRef<CEntrez2_link_set>
GetNeighbors(const vector<TUid> & query_uids,const string & db,const string & link_type)122 CEntrez2Client::GetNeighbors(const vector<TUid>& query_uids,
123                              const string& db,
124                              const string& link_type)
125 {
126     CEntrez2_id_list uids;
127     uids.SetDb() = CEntrez2_db_id(db);
128     uids.AssignUids(query_uids);
129 
130     CEntrez2_get_links gl;
131     gl.SetUids(uids);
132     gl.SetLinktype().Set(db + "_" + link_type);
133 
134     CRef<CEntrez2_link_set> link_set = AskGet_links(gl);
135     return link_set;
136 }
137 
138 
139 /// Retrieve counts of the various types of neighbors available
140 CRef<CEntrez2_link_count_list>
GetNeighborCounts(TUid query_uid,const string & db)141 CEntrez2Client::GetNeighborCounts(TUid query_uid,
142                                   const string& db)
143 {
144     CEntrez2_id uid;
145     uid.SetDb() = CEntrez2_db_id(db);
146     uid.SetUid(ENTREZ_ID_FROM(TIntId, query_uid));
147     return AskGet_link_counts(uid);
148 }
149 
150 
151 /// Query a db with a string, returning uids as integers
Query(const string & query,const string & db,vector<TUid> & result_uids,size_t start,size_t count,TReply * reply)152 void CEntrez2Client::Query(const string& query,
153                            const string& db,
154                            vector<TUid>& result_uids,
155                            size_t start,
156                            size_t count,
157                            TReply* reply)
158 {
159     CRef<CEntrez2_boolean_element> bel(new CEntrez2_boolean_element);
160     bel->SetStr(query);
161 
162     CEntrez2_boolean_exp bexp;
163     bexp.SetDb().Set(db);
164     bexp.SetExp().push_back(bel);
165 
166     // set some limits
167     if (start > 0) {
168         bexp.SetLimits().SetOffset_UIDs(start);
169     }
170     if (count > 0) {
171         bexp.SetLimits().SetMax_UIDs(count);
172     }
173 
174     CEntrez2_eval_boolean req;
175     req.SetReturn_UIDs(true);
176     req.SetQuery(bexp);
177 
178     CRef<CEntrez2_boolean_reply> bool_reply = AskEval_boolean(req, reply);
179 
180     // now extract the UIDs
181     if (!bool_reply->GetUids().CanGetUids()) {
182         // this happens when no matches were found
183         return;
184     }
185     for (CEntrez2_id_list::TConstUidIterator it
186              = bool_reply->GetUids().GetConstUidIterator();
187          !it.AtEnd();  ++it) {
188         result_uids.push_back(*it);
189     }
190 }
191 
192 
193 /// Given some uids, a database, and an entrez query string,
194 /// determine which of these uids match the query string.
195 ///
196 /// Note: If a uid appears more than once in query_uids and
197 /// matches the query string, it may or may not appear more
198 /// more than once in the result.
FilterIds(const vector<TUid> & query_uids,const string & db,const string & query_string,vector<TUid> & result_uids)199 void CEntrez2Client::FilterIds(const vector<TUid>& query_uids,
200                                const string& db,
201                                const string& query_string,
202                                vector<TUid>& result_uids)
203 {
204     const unsigned int kMaxIdsInQueryString = 2500;
205 
206     if (query_uids.empty()) {
207         return;
208     }
209 
210     if (query_uids.size() <= kMaxIdsInQueryString) {
211         // Query with a big query string that includes
212         // all the query_uids OR'd together
213         string uids;
214         ITERATE (vector<TUid>, uid, query_uids) {
215             if ( !uids.empty() ) {
216                 uids += " OR ";
217             }
218             uids += NStr::NumericToString(*uid) + "[UID]";
219         }
220 
221         string whole_query = "(" + query_string + ") AND (" + uids + ")";
222         Query(whole_query, db, result_uids);
223     } else {
224         // Break query_uids into chunks <= kMaxIdsInQueryString
225         vector<TUid> subset_query_uids;
226         subset_query_uids.reserve(kMaxIdsInQueryString);
227         for (size_t start = 0;  start < query_uids.size();
228              start += kMaxIdsInQueryString) {
229             subset_query_uids.clear();
230             // end is one past the last index of interest
231             size_t end =
232                 min(start + kMaxIdsInQueryString, query_uids.size());
233             for (size_t i = start;  i < end;  ++i) {
234                 subset_query_uids.push_back(query_uids[i]);
235             }
236             // This relies on the fact that FilterIds appends to result
237             FilterIds(subset_query_uids, db, query_string, result_uids);
238         }
239     }
240 }
241 
242 
243 CRef<CEntrez2_docsum_list>
GetDocsums(const vector<TUid> & uids,const string & db)244 CEntrez2Client::GetDocsums(const vector<TUid>& uids,
245                            const string& db)
246 {
247     CEntrez2_id_list ids;
248     ids.AssignUids(uids);
249     ids.SetDb().Set(db);
250     return AskGet_docsum(ids);
251 }
252 
253 
254 /// Retrieve the docsums for a single UID
255 CRef<CEntrez2_docsum_list>
GetDocsums(TUid uid,const string & db)256 CEntrez2Client::GetDocsums(TUid uid, const string& db)
257 {
258     vector<TUid> uids;
259     uids.push_back(uid);
260     return GetDocsums(uids, db);
261 }
262 
263 
264 string
GetAffinity(const CEntrez2_request & request) const265 CEntrez2Client::GetAffinity(const CEntrez2_request& request) const
266 {
267     const CE2Request&     e2req = request.GetRequest();
268     const CEntrez2_db_id* db    = 0;
269 
270     switch (e2req.Which()) {
271     case CE2Request::e_Eval_boolean:
272         db = &e2req.GetEval_boolean().GetQuery().GetDb();
273         break;
274     case CE2Request::e_Get_docsum:
275         db = &e2req.GetGet_docsum().GetDb();
276         break;
277     case CE2Request::e_Get_term_pos:
278         db = &e2req.GetGet_term_pos().GetDb();
279         break;
280     case CE2Request::e_Get_term_list:
281         db = &e2req.GetGet_term_list().GetDb();
282         break;
283     case CE2Request::e_Get_term_hierarchy:
284         db = &e2req.GetGet_term_hierarchy().GetDb();
285         break;
286     case CE2Request::e_Get_links:
287         db = &e2req.GetGet_links().GetUids().GetDb();
288         break;
289     case CE2Request::e_Get_linked:
290         db = &e2req.GetGet_linked().GetUids().GetDb();
291         break;
292     case CE2Request::e_Get_link_counts:
293         db = &e2req.GetGet_link_counts().GetDb();
294         break;
295     default:
296         break;
297     }
298 
299     if (db  &&  !db->Get().empty()) {
300         return "DB=" + db->Get();
301     } else {
302         return kEmptyStr;
303     }
304 }
305 
306 
307 #ifdef NCBI_STRICT_GI
308 
GetNeighbors(TGi query_uid,const string & db_from,const string & db_to,vector<TGi> & neighbor_uids)309 void CEntrez2Client::GetNeighbors(TGi query_uid,
310                                   const string& db_from,
311                                   const string& db_to,
312                                   vector<TGi>& neighbor_uids)
313 {
314     vector<TUid> vi_neighbor_uids;
315     GetNeighbors(GI_TO(TUid, query_uid), db_from, db_to, vi_neighbor_uids);
316     ITERATE(vector<TUid>, it, vi_neighbor_uids) {
317         neighbor_uids.push_back(GI_FROM(TUid, *it));
318     }
319 }
320 
GetNeighbors(const vector<TGi> & query_uids,const string & db,const string & link_type,vector<TGi> & neighbor_uids)321 void CEntrez2Client::GetNeighbors(const vector<TGi>& query_uids,
322                                   const string& db,
323                                   const string& link_type,
324                                   vector<TGi>& neighbor_uids)
325 {
326     vector<TUid> vi_query_uids;
327     ITERATE(vector<TGi>, it, query_uids) {
328         vi_query_uids.push_back(GI_TO(TUid, *it));
329     }
330     vector<TUid> vi_neighbor_uids;
331     GetNeighbors(vi_query_uids, db, link_type, vi_neighbor_uids);
332     ITERATE(vector<TUid>, it, vi_neighbor_uids) {
333         neighbor_uids.push_back(GI_FROM(TUid, *it));
334     }
335 }
336 
GetNeighbors(TGi query_uid,const string & db_from,const string & db_to)337 CRef<CEntrez2_link_set> CEntrez2Client::GetNeighbors(TGi query_uid,
338                                                      const string& db_from,
339                                                      const string& db_to)
340 {
341     return GetNeighbors(GI_TO(TUid, query_uid), db_from, db_to);
342 }
343 
GetNeighbors(const vector<TGi> & query_uids,const string & db_from,const string & db_to)344 CRef<CEntrez2_link_set> CEntrez2Client::GetNeighbors(const vector<TGi>& query_uids,
345                                                      const string& db_from,
346                                                      const string& db_to)
347 {
348     vector<TUid> vi_query_uids;
349     ITERATE(vector<TGi>, it, query_uids) {
350         vi_query_uids.push_back(GI_TO(TUid, *it));
351     }
352     return GetNeighbors(vi_query_uids, db_from, db_to);
353 }
354 
GetNeighborCounts(TGi query_uid,const string & db)355 CRef<CEntrez2_link_count_list> CEntrez2Client::GetNeighborCounts(TGi query_uid,
356                                                                  const string& db)
357 {
358     return GetNeighborCounts(GI_TO(TUid, query_uid), db);
359 }
360 
Query(const string & query,const string & db,vector<TGi> & result_uids,size_t start_offs,size_t count,TReply * reply)361 void CEntrez2Client::Query(const string& query,
362                            const string& db,
363                            vector<TGi>& result_uids,
364                            size_t start_offs,
365                            size_t count,
366                            TReply* reply)
367 {
368     vector<TUid> vi_result_uids;
369     Query(query, db, vi_result_uids, start_offs, count, reply);
370     ITERATE(vector<TUid>, it, vi_result_uids) {
371         result_uids.push_back(GI_FROM(TUid, *it));
372     }
373 }
374 
FilterIds(const vector<TGi> & query_uids,const string & db,const string & query_string,vector<TGi> & result_uids)375 void CEntrez2Client::FilterIds(const vector<TGi>& query_uids,
376                                const string& db,
377                                const string& query_string,
378                                vector<TGi>& result_uids)
379 {
380     vector<TUid> vi_query_uids;
381     ITERATE(vector<TGi>, it, query_uids) {
382         vi_query_uids.push_back(GI_TO(TUid, *it));
383     }
384     vector<TUid> vi_result_uids;
385     FilterIds(vi_query_uids, db, query_string, vi_result_uids);
386     ITERATE(vector<TUid>, it, vi_result_uids) {
387         result_uids.push_back(GI_FROM(TUid, *it));
388     }
389 }
390 
GetDocsums(const vector<TGi> & uids,const string & db)391 CRef<CEntrez2_docsum_list> CEntrez2Client::GetDocsums(const vector<TGi>& uids,
392                                                       const string& db)
393 {
394     vector<TUid> vi_uids;
395     ITERATE(vector<TGi>, it, uids) {
396         vi_uids.push_back(GI_TO(TUid, *it));
397     }
398     return GetDocsums(vi_uids, db);
399 }
400 
GetDocsums(TGi uid,const string & db)401 CRef<CEntrez2_docsum_list> CEntrez2Client::GetDocsums(TGi uid,
402                                                       const string& db)
403 {
404     return GetDocsums(GI_TO(TUid, uid), db);
405 }
406 
407 #endif
408 
409 END_objects_SCOPE // namespace ncbi::objects::
410 
411 END_NCBI_SCOPE
412 
413 /* Original file checksum: lines: 64, chars: 1896, CRC32: cd6a8df4 */
414