1 #ifndef PUBSEQ_GATEWAY__HPP
2 #define PUBSEQ_GATEWAY__HPP
3 
4 /*  $Id: pubseq_gateway.hpp 629837 2021-04-22 12:47:49Z ivanov $
5  * ===========================================================================
6  *
7  *                            PUBLIC DOMAIN NOTICE
8  *               National Center for Biotechnology Information
9  *
10  *  This software/database is a "United States Government Work" under the
11  *  terms of the United States Copyright Act.  It was written as part of
12  *  the author's official duties as a United States Government employee and
13  *  thus cannot be copyrighted.  This software/database is freely available
14  *  to the public for use. The National Library of Medicine and the U.S.
15  *  Government have not placed any restriction on its use or reproduction.
16  *
17  *  Although all reasonable efforts have been taken to ensure the accuracy
18  *  and reliability of the software and data, the NLM and the U.S.
19  *  Government do not and cannot warrant the performance or results that
20  *  may be obtained by using this software or data. The NLM and the U.S.
21  *  Government disclaim all warranties, express or implied, including
22  *  warranties of performance, merchantability or fitness for any particular
23  *  purpose.
24  *
25  *  Please cite the author in any work or product based on this material.
26  *
27  * ===========================================================================
28  *
29  * Authors: Dmitri Dmitrienko
30  *
31  * File Description:
32  *
33  */
34 #include <string>
35 
36 #include <corelib/ncbiapp.hpp>
37 #include <corelib/ncbi_system.hpp>
38 
39 #include <objtools/pubseq_gateway/impl/cassandra/cass_blob_op.hpp>
40 #include <objtools/pubseq_gateway/impl/cassandra/cass_factory.hpp>
41 #include <objtools/pubseq_gateway/cache/psg_cache.hpp>
42 #include <objtools/pubseq_gateway/impl/cassandra/messages.hpp>
43 
44 #include "pending_operation.hpp"
45 #include "http_server_transport.hpp"
46 #include "pubseq_gateway_version.hpp"
47 #include "pubseq_gateway_stat.hpp"
48 #include "pubseq_gateway_utils.hpp"
49 #include "pubseq_gateway_types.hpp"
50 #include "exclude_blob_cache.hpp"
51 #include "alerts.hpp"
52 #include "timing.hpp"
53 #include "psgs_dispatcher.hpp"
54 #include "osg_connection.hpp"
55 
56 
57 USING_NCBI_SCOPE;
58 
59 
60 const long  kMaxTestIOSize = 1000000000;
61 
62 
63 // Cassandra mapping needs to be switched atomically so it is first read into
64 // the corresponding structure and then switched.
65 struct SCassMapping
66 {
67     string                              m_BioseqKeyspace;
68     vector<pair<string, int32_t>>       m_BioseqNAKeyspaces;
69 
operator ==SCassMapping70     bool operator==(const SCassMapping &  rhs) const
71     {
72         return m_BioseqKeyspace == rhs.m_BioseqKeyspace &&
73                m_BioseqNAKeyspaces == rhs.m_BioseqNAKeyspaces;
74     }
75 
operator !=SCassMapping76     bool operator!=(const SCassMapping &  rhs) const
77     { return !this->operator==(rhs); }
78 
validateSCassMapping79     vector<string> validate(const string &  root_keyspace) const
80     {
81         vector<string>      errors;
82 
83         if (m_BioseqKeyspace.empty())
84             errors.push_back("Cannot find the resolver keyspace (where SI2CSI "
85                              "and BIOSEQ_INFO tables reside) in the " +
86                              root_keyspace + ".SAT2KEYSPACE table.");
87 
88         if (m_BioseqNAKeyspaces.empty())
89             errors.push_back("No bioseq named annotation keyspaces found "
90                              "in the " + root_keyspace + " keyspace.");
91 
92         return errors;
93     }
94 
clearSCassMapping95     void clear(void)
96     {
97         m_BioseqKeyspace.clear();
98         m_BioseqNAKeyspaces.clear();
99     }
100 };
101 
102 
103 class CPubseqGatewayApp: public CNcbiApplication
104 {
105 public:
106     CPubseqGatewayApp();
107     ~CPubseqGatewayApp();
108 
109     virtual void Init(void);
110     void ParseArgs(void);
111     void OpenCache(void);
112     bool OpenCass(void);
113     bool PopulateCassandraMapping(bool  need_accept_alert=false);
114     void PopulatePublicCommentsMapping(void);
115     void CheckCassMapping(void);
116     void CloseCass(void);
117     bool SatToKeyspace(int  sat, string &  sat_name);
118 
GetBioseqKeyspace(void) const119     string GetBioseqKeyspace(void) const
120     {
121         return m_CassMapping[m_MappingIndex].m_BioseqKeyspace;
122     }
123 
GetBioseqNAKeyspaces(void) const124     vector<pair<string, int32_t>> GetBioseqNAKeyspaces(void) const
125     {
126         return m_CassMapping[m_MappingIndex].m_BioseqNAKeyspaces;
127     }
128 
GetLookupCache(void)129     CPubseqGatewayCache *  GetLookupCache(void)
130     {
131         return m_LookupCache.get();
132     }
133 
GetExcludeBlobCache(void)134     CExcludeBlobCache *  GetExcludeBlobCache(void)
135     {
136         return m_ExcludeBlobCache.get();
137     }
138 
GetPublicCommentsMapping(void)139     CPSGMessages *  GetPublicCommentsMapping(void)
140     {
141         return m_PublicComments.get();
142     }
143 
GetSlimMaxBlobSize(void) const144     unsigned long GetSlimMaxBlobSize(void) const
145     {
146         return m_SlimMaxBlobSize;
147     }
148 
149     int OnBadURL(CHttpRequest &  req, shared_ptr<CPSGS_Reply>  reply);
150     int OnGet(CHttpRequest &  req, shared_ptr<CPSGS_Reply>  reply);
151     int OnGetBlob(CHttpRequest &  req, shared_ptr<CPSGS_Reply>  reply);
152     int OnResolve(CHttpRequest &  req, shared_ptr<CPSGS_Reply>  reply);
153     int OnGetTSEChunk(CHttpRequest &  req, shared_ptr<CPSGS_Reply>  reply);
154     int OnGetNA(CHttpRequest &  req, shared_ptr<CPSGS_Reply>  reply);
155     int OnHealth(CHttpRequest &  req, shared_ptr<CPSGS_Reply>  reply);
156     int OnConfig(CHttpRequest &  req, shared_ptr<CPSGS_Reply>  reply);
157     int OnInfo(CHttpRequest &  req, shared_ptr<CPSGS_Reply>  reply);
158     int OnStatus(CHttpRequest &  req, shared_ptr<CPSGS_Reply>  reply);
159     int OnShutdown(CHttpRequest &  req, shared_ptr<CPSGS_Reply>  reply);
160     int OnGetAlerts(CHttpRequest &  req, shared_ptr<CPSGS_Reply>  reply);
161     int OnAckAlert(CHttpRequest &  req, shared_ptr<CPSGS_Reply>  reply);
162     int OnStatistics(CHttpRequest &  req, shared_ptr<CPSGS_Reply>  reply);
163     int OnTestIO(CHttpRequest &  req, shared_ptr<CPSGS_Reply>  reply);
164 
165     virtual int Run(void);
166 
167     static CPubseqGatewayApp *  GetInstance(void);
168 
GetTiming(void)169     COperationTiming & GetTiming(void)
170     {
171         return *m_Timing.get();
172     }
173 
GetStartupDataState(void) const174     EPSGS_StartupDataState GetStartupDataState(void) const
175     {
176         return m_StartupDataState;
177     }
178 
GetIdToNameAndDescriptionMap(void) const179     map<string, tuple<string, string>>  GetIdToNameAndDescriptionMap(void) const
180     {
181         return m_IdToNameAndDescription;
182     }
183 
GetCassandraConnection(void)184     shared_ptr<CCassConnection> GetCassandraConnection(void)
185     {
186         return m_CassConnection;
187     }
188 
GetCassandraTimeout(void) const189     unsigned int GetCassandraTimeout(void) const
190     {
191         return m_TimeoutMs;
192     }
193 
GetCassandraMaxRetries(void) const194     unsigned int GetCassandraMaxRetries(void) const
195     {
196         return m_MaxRetries;
197     }
198 
GetOSGProcessorsEnabled() const199     bool GetOSGProcessorsEnabled() const
200     {
201         return m_OSGProcessorsEnabled;
202     }
203 
GetCDDProcessorsEnabled() const204     bool GetCDDProcessorsEnabled() const
205     {
206         return m_CDDProcessorsEnabled;
207     }
208 
GetOSGConnectionPool() const209     const CRef<psg::osg::COSGConnectionPool>& GetOSGConnectionPool() const
210     {
211         return m_OSGConnectionPool;
212     }
213 
GetCassandraProcessorsEnabled(void) const214     bool GetCassandraProcessorsEnabled(void) const
215     {
216         return m_CassandraProcessorsEnabled;
217     }
218 
219     IPSGS_Processor::EPSGS_StartProcessing
SignalStartProcessing(IPSGS_Processor * processor)220     SignalStartProcessing(IPSGS_Processor *  processor)
221     {
222         return m_RequestDispatcher.SignalStartProcessing(processor);
223     }
224 
SignalFinishProcessing(IPSGS_Processor * processor)225     void SignalFinishProcessing(IPSGS_Processor *  processor)
226     {
227         m_RequestDispatcher.SignalFinishProcessing(processor);
228     }
229 
SignalConnectionCancelled(IPSGS_Processor * processor)230     void SignalConnectionCancelled(IPSGS_Processor *  processor)
231     {
232         m_RequestDispatcher.SignalConnectionCancelled(processor);
233     }
234 
GetSSLEnable(void) const235     bool GetSSLEnable(void) const
236     {
237         return m_SSLEnable;
238     }
239 
GetSSLCertFile(void) const240     string GetSSLCertFile(void) const
241     {
242         return m_SSLCertFile;
243     }
244 
GetSSLKeyFile(void) const245     string GetSSLKeyFile(void) const
246     {
247         return m_SSLKeyFile;
248     }
249 
GetSSLCiphers(void) const250     string GetSSLCiphers(void) const
251     {
252         return m_SSLCiphers;
253     }
254 
GetAlerts(void)255     CPSGAlerts &  GetAlerts(void)
256     {
257         return m_Alerts;
258     }
259 
GetCounters(void)260     CPSGSCounters &  GetCounters(void)
261     {
262         return m_Counters;
263     }
264 
265 private:
266     struct SRequestParameter
267     {
268         bool            m_Found;
269         CTempString     m_Value;
270 
SRequestParameterCPubseqGatewayApp::SRequestParameter271         SRequestParameter() : m_Found(false)
272         {}
273     };
274 
275     void x_SendMessageAndCompletionChunks(
276         shared_ptr<CPSGS_Reply>  reply,
277         const string &  message,
278         CRequestStatus::ECode  status, int  code, EDiagSev  severity);
279 
280     bool x_ProcessCommonGetAndResolveParams(
281         CHttpRequest &  req,
282         shared_ptr<CPSGS_Reply>  reply,
283         CTempString &  seq_id, int &  seq_id_type,
284         SPSGS_RequestBase::EPSGS_CacheAndDbUse &  use_cache);
285 
286 private:
287     void x_ValidateArgs(void);
288     string  x_GetCmdLineArguments(void) const;
289     CRef<CRequestContext>  x_CreateRequestContext(CHttpRequest &  req) const;
290     void x_PrintRequestStop(CRef<CRequestContext>  context, int  status);
291 
292     SRequestParameter  x_GetParam(CHttpRequest &  req,
293                                   const string &  name) const;
294     SPSGS_RequestBase::EPSGS_CacheAndDbUse x_GetUseCacheParameter(
295                                                 CHttpRequest &  req,
296                                                 string &  err_msg);
297     bool x_IsBoolParamValid(const string &  param_name,
298                             const CTempString &  param_value,
299                             string &  err_msg) const;
300     bool x_ConvertIntParameter(const string &  param_name,
301                                const CTempString &  param_value,
302                                int &  converted,
303                                string &  err_msg) const;
304     bool x_IsResolutionParamValid(const string &  param_name,
305                                   const CTempString &  param_value,
306                                   string &  err_msg) const;
307     SPSGS_ResolveRequest::EPSGS_OutputFormat x_GetOutputFormat(
308                                     const string &  param_name,
309                                     const CTempString &  param_value,
310                                     string &  err_msg) const;
311     SPSGS_BlobRequestBase::EPSGS_TSEOption x_GetTSEOption(
312                               const string &  param_name,
313                               const CTempString &  param_value,
314                               string &  err_msg) const;
315     vector<string> x_GetExcludeBlobs(const string &  param_name,
316                                      const CTempString &  param_value) const;
317     unsigned long x_GetDataSize(const IRegistry &  reg,
318                                 const string &  section,
319                                 const string &  entry,
320                                 unsigned long  default_val);
321     SPSGS_RequestBase::EPSGS_AccSubstitutioOption x_GetAccessionSubstitutionOption(
322                                             const string &  param_name,
323                                             const CTempString &  param_value,
324                                             string &  err_msg) const;
325     bool x_GetTraceParameter(CHttpRequest &  req,
326                              const string &  param_name,
327                              SPSGS_RequestBase::EPSGS_Trace &  trace,
328                              string &  err_msg);
329     bool x_GetHops(CHttpRequest &  req,
330                    shared_ptr<CPSGS_Reply>  reply, int &  hops);
331     bool x_GetEnabledAndDisabledProcessors(CHttpRequest &  req,
332                                            shared_ptr<CPSGS_Reply>  reply,
333                                            vector<string> &  enabled_processors,
334                                            vector<string> &  disabled_processors);
335 
336 private:
337     void x_InsufficientArguments(shared_ptr<CPSGS_Reply>  reply,
338                                  CRef<CRequestContext> &  context,
339                                  const string &  err_msg);
340     void x_MalformedArguments(shared_ptr<CPSGS_Reply>  reply,
341                               CRef<CRequestContext> &  context,
342                               const string &  err_msg);
343     bool x_IsShuttingDown(shared_ptr<CPSGS_Reply>  reply);
344     void x_ReadIdToNameAndDescriptionConfiguration(const IRegistry &  reg,
345                                                    const string &  section);
346     void x_RegisterProcessors(void);
347     void x_DispatchRequest(shared_ptr<CPSGS_Request>  request,
348                            shared_ptr<CPSGS_Reply>  reply);
349     void x_InitSSL(void);
350 
351 private:
352     string                              m_Si2csiDbFile;
353     string                              m_BioseqInfoDbFile;
354     string                              m_BlobPropDbFile;
355 
356     // Bioseq and named annotations keyspaces can be updated dynamically.
357     // The index controls the active set.
358     // The sat names cannot be updated dynamically - they are read once.
359     size_t                              m_MappingIndex;
360     SCassMapping                        m_CassMapping[2];
361     vector<string>                      m_SatNames;
362     unique_ptr<CPSGMessages>            m_PublicComments;
363 
364     unsigned short                      m_HttpPort;
365     unsigned short                      m_HttpWorkers;
366     unsigned int                        m_ListenerBacklog;
367     unsigned short                      m_TcpMaxConn;
368 
369     shared_ptr<CCassConnection>         m_CassConnection;
370     shared_ptr<CCassConnectionFactory>  m_CassConnectionFactory;
371     unsigned int                        m_TimeoutMs;
372     unsigned int                        m_MaxRetries;
373 
374     unsigned int                        m_ExcludeCacheMaxSize;
375     unsigned int                        m_ExcludeCachePurgePercentage;
376     unsigned int                        m_ExcludeCacheInactivityPurge;
377     unsigned long                       m_SmallBlobSize;
378     unsigned long                       m_MinStatValue;
379     unsigned long                       m_MaxStatValue;
380     unsigned long                       m_NStatBins;
381     string                              m_StatScaleType;
382     unsigned long                       m_TickSpan;
383 
384     CTime                               m_StartTime;
385     string                              m_RootKeyspace;
386     string                              m_AuthToken;
387 
388     bool                                m_AllowIOTest;
389     unique_ptr<char []>                 m_IOTestBuffer;
390 
391     unsigned long                       m_SlimMaxBlobSize;
392     int                                 m_MaxHops;
393 
394     bool                                m_CassandraProcessorsEnabled;
395     string                              m_TestSeqId;
396     bool                                m_TestSeqIdIgnoreError;
397 
398     unique_ptr<CPubseqGatewayCache>     m_LookupCache;
399     unique_ptr<CHttpDaemon<CPendingOperation>>
400                                         m_TcpDaemon;
401 
402     unique_ptr<CExcludeBlobCache>       m_ExcludeBlobCache;
403 
404     CPSGAlerts                          m_Alerts;
405     unique_ptr<COperationTiming>        m_Timing;
406     CPSGSCounters                       m_Counters;
407 
408     EPSGS_StartupDataState              m_StartupDataState;
409     CNcbiLogFields                      m_LogFields;
410 
411     // Serialized JSON introspection message
412     string                              m_HelpMessage;
413 
414     // Configured counter/statistics ID to name/description
415     map<string, tuple<string, string>>  m_IdToNameAndDescription;
416 
417     bool                                m_OSGProcessorsEnabled;
418     CRef<psg::osg::COSGConnectionPool>  m_OSGConnectionPool;
419 
420     bool                                m_CDDProcessorsEnabled;
421 
422     // Requests dispatcher
423     CPSGS_Dispatcher                    m_RequestDispatcher;
424 
425     // https support
426     bool                                m_SSLEnable;
427     string                              m_SSLCertFile;
428     string                              m_SSLKeyFile;
429     string                              m_SSLCiphers;
430 
431 private:
432     static CPubseqGatewayApp *          sm_PubseqApp;
433 };
434 
435 
436 #endif
437