1 /* $Id: entrez2_client.cpp 607107 2020-04-30 12:37:23Z grichenk $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Josh Cherry
27 *
28 * File Description:
29 * Entrez2 network client
30 *
31 * Remark:
32 * This code was originally generated by application DATATOOL
33 * using specifications from the data definition file
34 * 'entrez2.asn'.
35 */
36
37 // standard includes
38
39 // generated includes
40
41 #include <ncbi_pch.hpp>
42 #include <objects/entrez2/entrez2_client.hpp>
43 #include <objects/entrez2/Entrez2_boolean_element.hpp>
44 #include <objects/entrez2/Entrez2_boolean_exp.hpp>
45 #include <objects/entrez2/Entrez2_boolean_reply.hpp>
46 #include <objects/entrez2/Entrez2_db_id.hpp>
47 #include <objects/entrez2/Entrez2_eval_boolean.hpp>
48 #include <objects/entrez2/Entrez2_get_links.hpp>
49 #include <objects/entrez2/Entrez2_hier_query.hpp>
50 #include <objects/entrez2/Entrez2_id.hpp>
51 #include <objects/entrez2/Entrez2_id_list.hpp>
52 #include <objects/entrez2/Entrez2_limits.hpp>
53 #include <objects/entrez2/Entrez2_link_id.hpp>
54 #include <objects/entrez2/Entrez2_term_pos.hpp>
55 #include <objects/entrez2/Entrez2_term_query.hpp>
56
57 #include <algorithm>
58
59 // generated classes
60
61 BEGIN_NCBI_SCOPE
62
63 BEGIN_objects_SCOPE // namespace ncbi::objects::
64
65 // destructor
~CEntrez2Client(void)66 CEntrez2Client::~CEntrez2Client(void)
67 {
68 }
69
70
71 /// A simplified interface for getting neighbors (links)
72
73 /// This form just yields a vector of UIDs
GetNeighbors(TUid query_uid,const string & db,const string & link_type,vector<TUid> & neighbor_uids)74 void CEntrez2Client::GetNeighbors(TUid query_uid,
75 const string& db,
76 const string& link_type,
77 vector<TUid>& neighbor_uids)
78 {
79 vector<TUid> uids;
80 uids.push_back(query_uid);
81 GetNeighbors(uids, db, link_type, neighbor_uids);
82 }
83
84
85 /// This form just yields a vector of UIDs
GetNeighbors(const vector<TUid> & query_uids,const string & db,const string & link_type,vector<TUid> & neighbor_uids)86 void CEntrez2Client::GetNeighbors(const vector<TUid>& query_uids,
87 const string& db,
88 const string& link_type,
89 vector<TUid>& neighbor_uids)
90 {
91 // first retrieve the link_set
92 CRef<CEntrez2_link_set> link_set;
93 link_set = GetNeighbors(query_uids, db, link_type);
94
95 // then extract the UIDs
96 CEntrez2_id_list::TConstUidIterator it
97 = link_set->GetIds().GetConstUidIterator();
98 if (link_set->GetIds().IsSetNum()) {
99 neighbor_uids.reserve(link_set->GetIds().GetNum());
100 }
101 for ( ; !it.AtEnd(); ++it) {
102 neighbor_uids.push_back(*it);
103 }
104 }
105
106
107 /// This form returns the entire CEntrez2_link_set object,
108 /// which includes scores.
GetNeighbors(TUid query_uid,const string & db,const string & link_type)109 CRef<CEntrez2_link_set> CEntrez2Client::GetNeighbors(TUid query_uid,
110 const string& db,
111 const string& link_type)
112 {
113 vector<TUid> uids;
114 uids.push_back(query_uid);
115 return GetNeighbors(uids, db, link_type);
116 }
117
118
119 /// This form returns the entire CEntrez2_link_set object,
120 /// which includes scores.
121 CRef<CEntrez2_link_set>
GetNeighbors(const vector<TUid> & query_uids,const string & db,const string & link_type)122 CEntrez2Client::GetNeighbors(const vector<TUid>& query_uids,
123 const string& db,
124 const string& link_type)
125 {
126 CEntrez2_id_list uids;
127 uids.SetDb() = CEntrez2_db_id(db);
128 uids.AssignUids(query_uids);
129
130 CEntrez2_get_links gl;
131 gl.SetUids(uids);
132 gl.SetLinktype().Set(db + "_" + link_type);
133
134 CRef<CEntrez2_link_set> link_set = AskGet_links(gl);
135 return link_set;
136 }
137
138
139 /// Retrieve counts of the various types of neighbors available
140 CRef<CEntrez2_link_count_list>
GetNeighborCounts(TUid query_uid,const string & db)141 CEntrez2Client::GetNeighborCounts(TUid query_uid,
142 const string& db)
143 {
144 CEntrez2_id uid;
145 uid.SetDb() = CEntrez2_db_id(db);
146 uid.SetUid(ENTREZ_ID_FROM(TIntId, query_uid));
147 return AskGet_link_counts(uid);
148 }
149
150
151 /// Query a db with a string, returning uids as integers
Query(const string & query,const string & db,vector<TUid> & result_uids,size_t start,size_t count,TReply * reply)152 void CEntrez2Client::Query(const string& query,
153 const string& db,
154 vector<TUid>& result_uids,
155 size_t start,
156 size_t count,
157 TReply* reply)
158 {
159 CRef<CEntrez2_boolean_element> bel(new CEntrez2_boolean_element);
160 bel->SetStr(query);
161
162 CEntrez2_boolean_exp bexp;
163 bexp.SetDb().Set(db);
164 bexp.SetExp().push_back(bel);
165
166 // set some limits
167 if (start > 0) {
168 bexp.SetLimits().SetOffset_UIDs(start);
169 }
170 if (count > 0) {
171 bexp.SetLimits().SetMax_UIDs(count);
172 }
173
174 CEntrez2_eval_boolean req;
175 req.SetReturn_UIDs(true);
176 req.SetQuery(bexp);
177
178 CRef<CEntrez2_boolean_reply> bool_reply = AskEval_boolean(req, reply);
179
180 // now extract the UIDs
181 if (!bool_reply->GetUids().CanGetUids()) {
182 // this happens when no matches were found
183 return;
184 }
185 for (CEntrez2_id_list::TConstUidIterator it
186 = bool_reply->GetUids().GetConstUidIterator();
187 !it.AtEnd(); ++it) {
188 result_uids.push_back(*it);
189 }
190 }
191
192
193 /// Given some uids, a database, and an entrez query string,
194 /// determine which of these uids match the query string.
195 ///
196 /// Note: If a uid appears more than once in query_uids and
197 /// matches the query string, it may or may not appear more
198 /// more than once in the result.
FilterIds(const vector<TUid> & query_uids,const string & db,const string & query_string,vector<TUid> & result_uids)199 void CEntrez2Client::FilterIds(const vector<TUid>& query_uids,
200 const string& db,
201 const string& query_string,
202 vector<TUid>& result_uids)
203 {
204 const unsigned int kMaxIdsInQueryString = 2500;
205
206 if (query_uids.empty()) {
207 return;
208 }
209
210 if (query_uids.size() <= kMaxIdsInQueryString) {
211 // Query with a big query string that includes
212 // all the query_uids OR'd together
213 string uids;
214 ITERATE (vector<TUid>, uid, query_uids) {
215 if ( !uids.empty() ) {
216 uids += " OR ";
217 }
218 uids += NStr::NumericToString(*uid) + "[UID]";
219 }
220
221 string whole_query = "(" + query_string + ") AND (" + uids + ")";
222 Query(whole_query, db, result_uids);
223 } else {
224 // Break query_uids into chunks <= kMaxIdsInQueryString
225 vector<TUid> subset_query_uids;
226 subset_query_uids.reserve(kMaxIdsInQueryString);
227 for (size_t start = 0; start < query_uids.size();
228 start += kMaxIdsInQueryString) {
229 subset_query_uids.clear();
230 // end is one past the last index of interest
231 size_t end =
232 min(start + kMaxIdsInQueryString, query_uids.size());
233 for (size_t i = start; i < end; ++i) {
234 subset_query_uids.push_back(query_uids[i]);
235 }
236 // This relies on the fact that FilterIds appends to result
237 FilterIds(subset_query_uids, db, query_string, result_uids);
238 }
239 }
240 }
241
242
243 CRef<CEntrez2_docsum_list>
GetDocsums(const vector<TUid> & uids,const string & db)244 CEntrez2Client::GetDocsums(const vector<TUid>& uids,
245 const string& db)
246 {
247 CEntrez2_id_list ids;
248 ids.AssignUids(uids);
249 ids.SetDb().Set(db);
250 return AskGet_docsum(ids);
251 }
252
253
254 /// Retrieve the docsums for a single UID
255 CRef<CEntrez2_docsum_list>
GetDocsums(TUid uid,const string & db)256 CEntrez2Client::GetDocsums(TUid uid, const string& db)
257 {
258 vector<TUid> uids;
259 uids.push_back(uid);
260 return GetDocsums(uids, db);
261 }
262
263
264 string
GetAffinity(const CEntrez2_request & request) const265 CEntrez2Client::GetAffinity(const CEntrez2_request& request) const
266 {
267 const CE2Request& e2req = request.GetRequest();
268 const CEntrez2_db_id* db = 0;
269
270 switch (e2req.Which()) {
271 case CE2Request::e_Eval_boolean:
272 db = &e2req.GetEval_boolean().GetQuery().GetDb();
273 break;
274 case CE2Request::e_Get_docsum:
275 db = &e2req.GetGet_docsum().GetDb();
276 break;
277 case CE2Request::e_Get_term_pos:
278 db = &e2req.GetGet_term_pos().GetDb();
279 break;
280 case CE2Request::e_Get_term_list:
281 db = &e2req.GetGet_term_list().GetDb();
282 break;
283 case CE2Request::e_Get_term_hierarchy:
284 db = &e2req.GetGet_term_hierarchy().GetDb();
285 break;
286 case CE2Request::e_Get_links:
287 db = &e2req.GetGet_links().GetUids().GetDb();
288 break;
289 case CE2Request::e_Get_linked:
290 db = &e2req.GetGet_linked().GetUids().GetDb();
291 break;
292 case CE2Request::e_Get_link_counts:
293 db = &e2req.GetGet_link_counts().GetDb();
294 break;
295 default:
296 break;
297 }
298
299 if (db && !db->Get().empty()) {
300 return "DB=" + db->Get();
301 } else {
302 return kEmptyStr;
303 }
304 }
305
306
307 #ifdef NCBI_STRICT_GI
308
GetNeighbors(TGi query_uid,const string & db_from,const string & db_to,vector<TGi> & neighbor_uids)309 void CEntrez2Client::GetNeighbors(TGi query_uid,
310 const string& db_from,
311 const string& db_to,
312 vector<TGi>& neighbor_uids)
313 {
314 vector<TUid> vi_neighbor_uids;
315 GetNeighbors(GI_TO(TUid, query_uid), db_from, db_to, vi_neighbor_uids);
316 ITERATE(vector<TUid>, it, vi_neighbor_uids) {
317 neighbor_uids.push_back(GI_FROM(TUid, *it));
318 }
319 }
320
GetNeighbors(const vector<TGi> & query_uids,const string & db,const string & link_type,vector<TGi> & neighbor_uids)321 void CEntrez2Client::GetNeighbors(const vector<TGi>& query_uids,
322 const string& db,
323 const string& link_type,
324 vector<TGi>& neighbor_uids)
325 {
326 vector<TUid> vi_query_uids;
327 ITERATE(vector<TGi>, it, query_uids) {
328 vi_query_uids.push_back(GI_TO(TUid, *it));
329 }
330 vector<TUid> vi_neighbor_uids;
331 GetNeighbors(vi_query_uids, db, link_type, vi_neighbor_uids);
332 ITERATE(vector<TUid>, it, vi_neighbor_uids) {
333 neighbor_uids.push_back(GI_FROM(TUid, *it));
334 }
335 }
336
GetNeighbors(TGi query_uid,const string & db_from,const string & db_to)337 CRef<CEntrez2_link_set> CEntrez2Client::GetNeighbors(TGi query_uid,
338 const string& db_from,
339 const string& db_to)
340 {
341 return GetNeighbors(GI_TO(TUid, query_uid), db_from, db_to);
342 }
343
GetNeighbors(const vector<TGi> & query_uids,const string & db_from,const string & db_to)344 CRef<CEntrez2_link_set> CEntrez2Client::GetNeighbors(const vector<TGi>& query_uids,
345 const string& db_from,
346 const string& db_to)
347 {
348 vector<TUid> vi_query_uids;
349 ITERATE(vector<TGi>, it, query_uids) {
350 vi_query_uids.push_back(GI_TO(TUid, *it));
351 }
352 return GetNeighbors(vi_query_uids, db_from, db_to);
353 }
354
GetNeighborCounts(TGi query_uid,const string & db)355 CRef<CEntrez2_link_count_list> CEntrez2Client::GetNeighborCounts(TGi query_uid,
356 const string& db)
357 {
358 return GetNeighborCounts(GI_TO(TUid, query_uid), db);
359 }
360
Query(const string & query,const string & db,vector<TGi> & result_uids,size_t start_offs,size_t count,TReply * reply)361 void CEntrez2Client::Query(const string& query,
362 const string& db,
363 vector<TGi>& result_uids,
364 size_t start_offs,
365 size_t count,
366 TReply* reply)
367 {
368 vector<TUid> vi_result_uids;
369 Query(query, db, vi_result_uids, start_offs, count, reply);
370 ITERATE(vector<TUid>, it, vi_result_uids) {
371 result_uids.push_back(GI_FROM(TUid, *it));
372 }
373 }
374
FilterIds(const vector<TGi> & query_uids,const string & db,const string & query_string,vector<TGi> & result_uids)375 void CEntrez2Client::FilterIds(const vector<TGi>& query_uids,
376 const string& db,
377 const string& query_string,
378 vector<TGi>& result_uids)
379 {
380 vector<TUid> vi_query_uids;
381 ITERATE(vector<TGi>, it, query_uids) {
382 vi_query_uids.push_back(GI_TO(TUid, *it));
383 }
384 vector<TUid> vi_result_uids;
385 FilterIds(vi_query_uids, db, query_string, vi_result_uids);
386 ITERATE(vector<TUid>, it, vi_result_uids) {
387 result_uids.push_back(GI_FROM(TUid, *it));
388 }
389 }
390
GetDocsums(const vector<TGi> & uids,const string & db)391 CRef<CEntrez2_docsum_list> CEntrez2Client::GetDocsums(const vector<TGi>& uids,
392 const string& db)
393 {
394 vector<TUid> vi_uids;
395 ITERATE(vector<TGi>, it, uids) {
396 vi_uids.push_back(GI_TO(TUid, *it));
397 }
398 return GetDocsums(vi_uids, db);
399 }
400
GetDocsums(TGi uid,const string & db)401 CRef<CEntrez2_docsum_list> CEntrez2Client::GetDocsums(TGi uid,
402 const string& db)
403 {
404 return GetDocsums(GI_TO(TUid, uid), db);
405 }
406
407 #endif
408
409 END_objects_SCOPE // namespace ncbi::objects::
410
411 END_NCBI_SCOPE
412
413 /* Original file checksum: lines: 64, chars: 1896, CRC32: cd6a8df4 */
414