1 #ifndef PUBSEQ_GATEWAY__HPP 2 #define PUBSEQ_GATEWAY__HPP 3 4 /* $Id: pubseq_gateway.hpp 629837 2021-04-22 12:47:49Z ivanov $ 5 * =========================================================================== 6 * 7 * PUBLIC DOMAIN NOTICE 8 * National Center for Biotechnology Information 9 * 10 * This software/database is a "United States Government Work" under the 11 * terms of the United States Copyright Act. It was written as part of 12 * the author's official duties as a United States Government employee and 13 * thus cannot be copyrighted. This software/database is freely available 14 * to the public for use. The National Library of Medicine and the U.S. 15 * Government have not placed any restriction on its use or reproduction. 16 * 17 * Although all reasonable efforts have been taken to ensure the accuracy 18 * and reliability of the software and data, the NLM and the U.S. 19 * Government do not and cannot warrant the performance or results that 20 * may be obtained by using this software or data. The NLM and the U.S. 21 * Government disclaim all warranties, express or implied, including 22 * warranties of performance, merchantability or fitness for any particular 23 * purpose. 24 * 25 * Please cite the author in any work or product based on this material. 26 * 27 * =========================================================================== 28 * 29 * Authors: Dmitri Dmitrienko 30 * 31 * File Description: 32 * 33 */ 34 #include <string> 35 36 #include <corelib/ncbiapp.hpp> 37 #include <corelib/ncbi_system.hpp> 38 39 #include <objtools/pubseq_gateway/impl/cassandra/cass_blob_op.hpp> 40 #include <objtools/pubseq_gateway/impl/cassandra/cass_factory.hpp> 41 #include <objtools/pubseq_gateway/cache/psg_cache.hpp> 42 #include <objtools/pubseq_gateway/impl/cassandra/messages.hpp> 43 44 #include "pending_operation.hpp" 45 #include "http_server_transport.hpp" 46 #include "pubseq_gateway_version.hpp" 47 #include "pubseq_gateway_stat.hpp" 48 #include "pubseq_gateway_utils.hpp" 49 #include "pubseq_gateway_types.hpp" 50 #include "exclude_blob_cache.hpp" 51 #include "alerts.hpp" 52 #include "timing.hpp" 53 #include "psgs_dispatcher.hpp" 54 #include "osg_connection.hpp" 55 56 57 USING_NCBI_SCOPE; 58 59 60 const long kMaxTestIOSize = 1000000000; 61 62 63 // Cassandra mapping needs to be switched atomically so it is first read into 64 // the corresponding structure and then switched. 65 struct SCassMapping 66 { 67 string m_BioseqKeyspace; 68 vector<pair<string, int32_t>> m_BioseqNAKeyspaces; 69 operator ==SCassMapping70 bool operator==(const SCassMapping & rhs) const 71 { 72 return m_BioseqKeyspace == rhs.m_BioseqKeyspace && 73 m_BioseqNAKeyspaces == rhs.m_BioseqNAKeyspaces; 74 } 75 operator !=SCassMapping76 bool operator!=(const SCassMapping & rhs) const 77 { return !this->operator==(rhs); } 78 validateSCassMapping79 vector<string> validate(const string & root_keyspace) const 80 { 81 vector<string> errors; 82 83 if (m_BioseqKeyspace.empty()) 84 errors.push_back("Cannot find the resolver keyspace (where SI2CSI " 85 "and BIOSEQ_INFO tables reside) in the " + 86 root_keyspace + ".SAT2KEYSPACE table."); 87 88 if (m_BioseqNAKeyspaces.empty()) 89 errors.push_back("No bioseq named annotation keyspaces found " 90 "in the " + root_keyspace + " keyspace."); 91 92 return errors; 93 } 94 clearSCassMapping95 void clear(void) 96 { 97 m_BioseqKeyspace.clear(); 98 m_BioseqNAKeyspaces.clear(); 99 } 100 }; 101 102 103 class CPubseqGatewayApp: public CNcbiApplication 104 { 105 public: 106 CPubseqGatewayApp(); 107 ~CPubseqGatewayApp(); 108 109 virtual void Init(void); 110 void ParseArgs(void); 111 void OpenCache(void); 112 bool OpenCass(void); 113 bool PopulateCassandraMapping(bool need_accept_alert=false); 114 void PopulatePublicCommentsMapping(void); 115 void CheckCassMapping(void); 116 void CloseCass(void); 117 bool SatToKeyspace(int sat, string & sat_name); 118 GetBioseqKeyspace(void) const119 string GetBioseqKeyspace(void) const 120 { 121 return m_CassMapping[m_MappingIndex].m_BioseqKeyspace; 122 } 123 GetBioseqNAKeyspaces(void) const124 vector<pair<string, int32_t>> GetBioseqNAKeyspaces(void) const 125 { 126 return m_CassMapping[m_MappingIndex].m_BioseqNAKeyspaces; 127 } 128 GetLookupCache(void)129 CPubseqGatewayCache * GetLookupCache(void) 130 { 131 return m_LookupCache.get(); 132 } 133 GetExcludeBlobCache(void)134 CExcludeBlobCache * GetExcludeBlobCache(void) 135 { 136 return m_ExcludeBlobCache.get(); 137 } 138 GetPublicCommentsMapping(void)139 CPSGMessages * GetPublicCommentsMapping(void) 140 { 141 return m_PublicComments.get(); 142 } 143 GetSlimMaxBlobSize(void) const144 unsigned long GetSlimMaxBlobSize(void) const 145 { 146 return m_SlimMaxBlobSize; 147 } 148 149 int OnBadURL(CHttpRequest & req, shared_ptr<CPSGS_Reply> reply); 150 int OnGet(CHttpRequest & req, shared_ptr<CPSGS_Reply> reply); 151 int OnGetBlob(CHttpRequest & req, shared_ptr<CPSGS_Reply> reply); 152 int OnResolve(CHttpRequest & req, shared_ptr<CPSGS_Reply> reply); 153 int OnGetTSEChunk(CHttpRequest & req, shared_ptr<CPSGS_Reply> reply); 154 int OnGetNA(CHttpRequest & req, shared_ptr<CPSGS_Reply> reply); 155 int OnHealth(CHttpRequest & req, shared_ptr<CPSGS_Reply> reply); 156 int OnConfig(CHttpRequest & req, shared_ptr<CPSGS_Reply> reply); 157 int OnInfo(CHttpRequest & req, shared_ptr<CPSGS_Reply> reply); 158 int OnStatus(CHttpRequest & req, shared_ptr<CPSGS_Reply> reply); 159 int OnShutdown(CHttpRequest & req, shared_ptr<CPSGS_Reply> reply); 160 int OnGetAlerts(CHttpRequest & req, shared_ptr<CPSGS_Reply> reply); 161 int OnAckAlert(CHttpRequest & req, shared_ptr<CPSGS_Reply> reply); 162 int OnStatistics(CHttpRequest & req, shared_ptr<CPSGS_Reply> reply); 163 int OnTestIO(CHttpRequest & req, shared_ptr<CPSGS_Reply> reply); 164 165 virtual int Run(void); 166 167 static CPubseqGatewayApp * GetInstance(void); 168 GetTiming(void)169 COperationTiming & GetTiming(void) 170 { 171 return *m_Timing.get(); 172 } 173 GetStartupDataState(void) const174 EPSGS_StartupDataState GetStartupDataState(void) const 175 { 176 return m_StartupDataState; 177 } 178 GetIdToNameAndDescriptionMap(void) const179 map<string, tuple<string, string>> GetIdToNameAndDescriptionMap(void) const 180 { 181 return m_IdToNameAndDescription; 182 } 183 GetCassandraConnection(void)184 shared_ptr<CCassConnection> GetCassandraConnection(void) 185 { 186 return m_CassConnection; 187 } 188 GetCassandraTimeout(void) const189 unsigned int GetCassandraTimeout(void) const 190 { 191 return m_TimeoutMs; 192 } 193 GetCassandraMaxRetries(void) const194 unsigned int GetCassandraMaxRetries(void) const 195 { 196 return m_MaxRetries; 197 } 198 GetOSGProcessorsEnabled() const199 bool GetOSGProcessorsEnabled() const 200 { 201 return m_OSGProcessorsEnabled; 202 } 203 GetCDDProcessorsEnabled() const204 bool GetCDDProcessorsEnabled() const 205 { 206 return m_CDDProcessorsEnabled; 207 } 208 GetOSGConnectionPool() const209 const CRef<psg::osg::COSGConnectionPool>& GetOSGConnectionPool() const 210 { 211 return m_OSGConnectionPool; 212 } 213 GetCassandraProcessorsEnabled(void) const214 bool GetCassandraProcessorsEnabled(void) const 215 { 216 return m_CassandraProcessorsEnabled; 217 } 218 219 IPSGS_Processor::EPSGS_StartProcessing SignalStartProcessing(IPSGS_Processor * processor)220 SignalStartProcessing(IPSGS_Processor * processor) 221 { 222 return m_RequestDispatcher.SignalStartProcessing(processor); 223 } 224 SignalFinishProcessing(IPSGS_Processor * processor)225 void SignalFinishProcessing(IPSGS_Processor * processor) 226 { 227 m_RequestDispatcher.SignalFinishProcessing(processor); 228 } 229 SignalConnectionCancelled(IPSGS_Processor * processor)230 void SignalConnectionCancelled(IPSGS_Processor * processor) 231 { 232 m_RequestDispatcher.SignalConnectionCancelled(processor); 233 } 234 GetSSLEnable(void) const235 bool GetSSLEnable(void) const 236 { 237 return m_SSLEnable; 238 } 239 GetSSLCertFile(void) const240 string GetSSLCertFile(void) const 241 { 242 return m_SSLCertFile; 243 } 244 GetSSLKeyFile(void) const245 string GetSSLKeyFile(void) const 246 { 247 return m_SSLKeyFile; 248 } 249 GetSSLCiphers(void) const250 string GetSSLCiphers(void) const 251 { 252 return m_SSLCiphers; 253 } 254 GetAlerts(void)255 CPSGAlerts & GetAlerts(void) 256 { 257 return m_Alerts; 258 } 259 GetCounters(void)260 CPSGSCounters & GetCounters(void) 261 { 262 return m_Counters; 263 } 264 265 private: 266 struct SRequestParameter 267 { 268 bool m_Found; 269 CTempString m_Value; 270 SRequestParameterCPubseqGatewayApp::SRequestParameter271 SRequestParameter() : m_Found(false) 272 {} 273 }; 274 275 void x_SendMessageAndCompletionChunks( 276 shared_ptr<CPSGS_Reply> reply, 277 const string & message, 278 CRequestStatus::ECode status, int code, EDiagSev severity); 279 280 bool x_ProcessCommonGetAndResolveParams( 281 CHttpRequest & req, 282 shared_ptr<CPSGS_Reply> reply, 283 CTempString & seq_id, int & seq_id_type, 284 SPSGS_RequestBase::EPSGS_CacheAndDbUse & use_cache); 285 286 private: 287 void x_ValidateArgs(void); 288 string x_GetCmdLineArguments(void) const; 289 CRef<CRequestContext> x_CreateRequestContext(CHttpRequest & req) const; 290 void x_PrintRequestStop(CRef<CRequestContext> context, int status); 291 292 SRequestParameter x_GetParam(CHttpRequest & req, 293 const string & name) const; 294 SPSGS_RequestBase::EPSGS_CacheAndDbUse x_GetUseCacheParameter( 295 CHttpRequest & req, 296 string & err_msg); 297 bool x_IsBoolParamValid(const string & param_name, 298 const CTempString & param_value, 299 string & err_msg) const; 300 bool x_ConvertIntParameter(const string & param_name, 301 const CTempString & param_value, 302 int & converted, 303 string & err_msg) const; 304 bool x_IsResolutionParamValid(const string & param_name, 305 const CTempString & param_value, 306 string & err_msg) const; 307 SPSGS_ResolveRequest::EPSGS_OutputFormat x_GetOutputFormat( 308 const string & param_name, 309 const CTempString & param_value, 310 string & err_msg) const; 311 SPSGS_BlobRequestBase::EPSGS_TSEOption x_GetTSEOption( 312 const string & param_name, 313 const CTempString & param_value, 314 string & err_msg) const; 315 vector<string> x_GetExcludeBlobs(const string & param_name, 316 const CTempString & param_value) const; 317 unsigned long x_GetDataSize(const IRegistry & reg, 318 const string & section, 319 const string & entry, 320 unsigned long default_val); 321 SPSGS_RequestBase::EPSGS_AccSubstitutioOption x_GetAccessionSubstitutionOption( 322 const string & param_name, 323 const CTempString & param_value, 324 string & err_msg) const; 325 bool x_GetTraceParameter(CHttpRequest & req, 326 const string & param_name, 327 SPSGS_RequestBase::EPSGS_Trace & trace, 328 string & err_msg); 329 bool x_GetHops(CHttpRequest & req, 330 shared_ptr<CPSGS_Reply> reply, int & hops); 331 bool x_GetEnabledAndDisabledProcessors(CHttpRequest & req, 332 shared_ptr<CPSGS_Reply> reply, 333 vector<string> & enabled_processors, 334 vector<string> & disabled_processors); 335 336 private: 337 void x_InsufficientArguments(shared_ptr<CPSGS_Reply> reply, 338 CRef<CRequestContext> & context, 339 const string & err_msg); 340 void x_MalformedArguments(shared_ptr<CPSGS_Reply> reply, 341 CRef<CRequestContext> & context, 342 const string & err_msg); 343 bool x_IsShuttingDown(shared_ptr<CPSGS_Reply> reply); 344 void x_ReadIdToNameAndDescriptionConfiguration(const IRegistry & reg, 345 const string & section); 346 void x_RegisterProcessors(void); 347 void x_DispatchRequest(shared_ptr<CPSGS_Request> request, 348 shared_ptr<CPSGS_Reply> reply); 349 void x_InitSSL(void); 350 351 private: 352 string m_Si2csiDbFile; 353 string m_BioseqInfoDbFile; 354 string m_BlobPropDbFile; 355 356 // Bioseq and named annotations keyspaces can be updated dynamically. 357 // The index controls the active set. 358 // The sat names cannot be updated dynamically - they are read once. 359 size_t m_MappingIndex; 360 SCassMapping m_CassMapping[2]; 361 vector<string> m_SatNames; 362 unique_ptr<CPSGMessages> m_PublicComments; 363 364 unsigned short m_HttpPort; 365 unsigned short m_HttpWorkers; 366 unsigned int m_ListenerBacklog; 367 unsigned short m_TcpMaxConn; 368 369 shared_ptr<CCassConnection> m_CassConnection; 370 shared_ptr<CCassConnectionFactory> m_CassConnectionFactory; 371 unsigned int m_TimeoutMs; 372 unsigned int m_MaxRetries; 373 374 unsigned int m_ExcludeCacheMaxSize; 375 unsigned int m_ExcludeCachePurgePercentage; 376 unsigned int m_ExcludeCacheInactivityPurge; 377 unsigned long m_SmallBlobSize; 378 unsigned long m_MinStatValue; 379 unsigned long m_MaxStatValue; 380 unsigned long m_NStatBins; 381 string m_StatScaleType; 382 unsigned long m_TickSpan; 383 384 CTime m_StartTime; 385 string m_RootKeyspace; 386 string m_AuthToken; 387 388 bool m_AllowIOTest; 389 unique_ptr<char []> m_IOTestBuffer; 390 391 unsigned long m_SlimMaxBlobSize; 392 int m_MaxHops; 393 394 bool m_CassandraProcessorsEnabled; 395 string m_TestSeqId; 396 bool m_TestSeqIdIgnoreError; 397 398 unique_ptr<CPubseqGatewayCache> m_LookupCache; 399 unique_ptr<CHttpDaemon<CPendingOperation>> 400 m_TcpDaemon; 401 402 unique_ptr<CExcludeBlobCache> m_ExcludeBlobCache; 403 404 CPSGAlerts m_Alerts; 405 unique_ptr<COperationTiming> m_Timing; 406 CPSGSCounters m_Counters; 407 408 EPSGS_StartupDataState m_StartupDataState; 409 CNcbiLogFields m_LogFields; 410 411 // Serialized JSON introspection message 412 string m_HelpMessage; 413 414 // Configured counter/statistics ID to name/description 415 map<string, tuple<string, string>> m_IdToNameAndDescription; 416 417 bool m_OSGProcessorsEnabled; 418 CRef<psg::osg::COSGConnectionPool> m_OSGConnectionPool; 419 420 bool m_CDDProcessorsEnabled; 421 422 // Requests dispatcher 423 CPSGS_Dispatcher m_RequestDispatcher; 424 425 // https support 426 bool m_SSLEnable; 427 string m_SSLCertFile; 428 string m_SSLKeyFile; 429 string m_SSLCiphers; 430 431 private: 432 static CPubseqGatewayApp * sm_PubseqApp; 433 }; 434 435 436 #endif 437