1 /*  $Id: remote_cgi_wn.cpp 617780 2020-10-06 16:24:16Z gouriano $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors:  Maxim Didenko, Dmitry Kazimirov
27  *
28  * File Description:  NetSchedule worker node sample
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
33 
34 #include "exec_helpers.hpp"
35 
36 #include <cgi/ncbicgi.hpp>
37 
38 #include <functional>
39 #include <iterator>
40 
41 USING_NCBI_SCOPE;
42 
43 struct IsStandard
44 {
45     typedef string argument_type;
46     bool operator() (const string& val) const;
47     static const char* const sm_StandardCgiEnv[];
48 };
49 
50 const char* const IsStandard::sm_StandardCgiEnv[] = {
51     "DOCUMENT_ROOT",
52     "GATEWAY_INTERFACE",
53     "PROXIED_IP",
54     "QUERY_STRING",
55     "REMOTE_ADDR",
56     "REMOTE_PORT",
57     "REMOTE_IDENT",
58     "REQUEST_METHOD",
59     "REQUEST_URI",
60     "SCRIPT_FILENAME",
61     "SCRIPT_NAME",
62     "SCRIPT_URI",
63     "SCRIPT_URL",
64     "SERVER_ADDR",
65     "SERVER_ADMIN",
66     "SERVER_NAME",
67     "SERVER_PORT",
68     "SERVER_PROTOCOL",
69     "SERVER_SIGNATURE",
70     "SERVER_SOFTWARE",
71     "SERVER_METHOD",
72     "PATH_INFO",
73     "PATH_TRANSLATED",
74     "AUTH_TYPE",
75     "CONTENT_TYPE",
76     "CONTENT_LENGTH",
77     "UNIQUE_ID",
78     NULL
79 };
80 
operator ()(const string & val) const81 inline bool IsStandard::operator() (const string& val) const
82 {
83     if (NStr::StartsWith(val, "HTTP_")) return true;
84     for( int i = 0; sm_StandardCgiEnv[i] != NULL; ++i) {
85         if (NStr::Compare(val, sm_StandardCgiEnv[i]) == 0) return true;
86     }
87     return false;
88 }
89 
90 template <typename Cont>
91 struct HasValue
92 {
93     typedef string argument_type;
HasValueHasValue94     HasValue(const Cont& cont) : m_Cont(cont) {}
operator ()HasValue95     inline bool operator() (const string& val) const
96     {
97         return find(m_Cont.begin(), m_Cont.end(), val) != m_Cont.end();
98     }
99     const Cont& m_Cont;
100 };
101 
102 class CCgiEnvHolder
103 {
104 public:
105     CCgiEnvHolder(const CRemoteAppLauncher& remote_app_launcher,
106         const CNcbiEnvironment& client_env,
107         const CNetScheduleJob& job,
108         const string& service_name,
109         const string& queue_name);
110 
GetEnv() const111     const char* const* GetEnv() const { return &m_Env[0]; }
112 
113 private:
114     list<string> m_EnvValues;
115     vector<const char*> m_Env;
116 };
117 
CCgiEnvHolder(const CRemoteAppLauncher & remote_app_launcher,const CNcbiEnvironment & client_env,const CNetScheduleJob & job,const string & service_name,const string & queue_name)118 CCgiEnvHolder::CCgiEnvHolder(const CRemoteAppLauncher& remote_app_launcher,
119     const CNcbiEnvironment& client_env,
120     const CNetScheduleJob& job,
121     const string& service_name,
122     const string& queue_name)
123 {
124     list<string> cln_names;
125     client_env.Enumerate(cln_names);
126     list<string> names(cln_names.begin(), cln_names.end());
127 
128     names.erase(remove_if(names.begin(), names.end(),
129             not1(IsStandard())), names.end());
130 
131     names.erase(remove_if(names.begin(), names.end(),
132             HasValue<list<string> >(remote_app_launcher.GetExcludedEnv())),
133             names.end());
134 
135     list<string> inc_names(cln_names.begin(), cln_names.end());
136 
137     inc_names.erase(remove_if(inc_names.begin(),inc_names.end(),
138         not1(HasValue<list<string> >(remote_app_launcher.GetIncludedEnv()))),
139             inc_names.end());
140 
141     names.insert(names.begin(),inc_names.begin(), inc_names.end());
142     const CRemoteAppLauncher::TEnvMap& added_env =
143             remote_app_launcher.GetAddedEnv();
144     ITERATE(CRemoteAppLauncher::TEnvMap, it, added_env) {
145         m_EnvValues.push_back(it->first + "=" +it->second);
146     }
147 
148     ITERATE(list<string>, it, names) {
149         if (added_env.find(*it) == added_env.end())
150             m_EnvValues.push_back(*it + "=" + client_env.Get(*it));
151     }
152 
153     list<string> local_names;
154     const CNcbiEnvironment& local_env = remote_app_launcher.GetLocalEnv();
155     local_env.Enumerate(local_names);
156     ITERATE(list<string>, it, local_names) {
157         const string& s = *it;
158         if (added_env.find(s) == added_env.end()
159             && find(names.begin(), names.end(), s) == names.end())
160             m_EnvValues.push_back(s + "=" + local_env.Get(s));
161     }
162 
163     m_EnvValues.push_back("NCBI_NS_SERVICE=" + service_name);
164     m_EnvValues.push_back("NCBI_NS_QUEUE=" + queue_name);
165     m_EnvValues.push_back("NCBI_NS_JID=" + job.job_id);
166     m_EnvValues.push_back("NCBI_JOB_AFFINITY=" + job.affinity);
167 
168     if (!job.client_ip.empty())
169         m_EnvValues.push_back("NCBI_LOG_CLIENT_IP=" + job.client_ip);
170 
171     if (!job.session_id.empty())
172         m_EnvValues.push_back("NCBI_LOG_SESSION_ID=" + job.session_id);
173 
174     if (!job.page_hit_id.empty())
175         m_EnvValues.push_back("NCBI_LOG_HIT_ID=" + job.page_hit_id);
176 
177     ITERATE(list<string>, it, m_EnvValues) {
178         m_Env.push_back(it->c_str());
179     }
180     m_Env.push_back(NULL);
181 }
182 
183 
184 ///////////////////////////////////////////////////////////////////////
185 
186 class CRemoteCgiJob : public IWorkerNodeJob
187 {
188 public:
189     CRemoteCgiJob(const IWorkerNodeInitContext& context,
190             const CRemoteAppLauncher& remote_app_launcher);
191 
~CRemoteCgiJob()192     virtual ~CRemoteCgiJob() {}
193 
194     int Do(CWorkerNodeJobContext& context);
195 
196 private:
197     const CRemoteAppLauncher& m_RemoteAppLauncher;
198 };
199 
Do(CWorkerNodeJobContext & context)200 int CRemoteCgiJob::Do(CWorkerNodeJobContext& context)
201 {
202     if (context.IsLogRequested()) {
203         LOG_POST(Note << "Job " << context.GetJobKey() + " input: " +
204             context.GetJobInput());
205     }
206 
207     unique_ptr<CCgiRequest> request;
208 
209     try {
210         request.reset(new CCgiRequest(context.GetIStream(),
211             CCgiRequest::fIgnoreQueryString |
212             CCgiRequest::fDoNotParseContent));
213     }
214     catch (exception&) {
215         ERR_POST("Cannot deserialize remote_cgi job");
216         context.CommitJobWithFailure(
217             "Error while parsing CGI request stream");
218         return -1;
219     }
220 
221     CCgiEnvHolder env(m_RemoteAppLauncher,
222             request->GetEnvironment(),
223             context.GetJob(),
224             context.GetWorkerNode().GetServiceName(),
225             context.GetQueueName());
226     vector<string> args;
227 
228     CNcbiOstrstream err;
229     CNcbiStrstream str_in;
230     CNcbiIstream* in = request->GetInputStream();
231     if (!in)
232         in = &str_in;
233 
234     int ret = -1;
235     bool finished_ok = m_RemoteAppLauncher.ExecRemoteApp(args,
236                                         *in,
237                                         context.GetOStream(),
238                                         err,
239                                         ret,
240                                         context,
241                                         0,
242                                         env.GetEnv());
243 
244     m_RemoteAppLauncher.FinishJob(finished_ok, ret, context);
245 
246     if (context.IsLogRequested()) {
247         if ( !IsOssEmpty(err) )
248             LOG_POST(Note << "STDERR: " << (string)CNcbiOstrstreamToString(err));
249 
250         LOG_POST(Note << "Job " << context.GetJobKey() <<
251             " is " << context.GetCommitStatusDescription(
252                     context.GetCommitStatus()) <<
253             ". Exit code: " << ret <<
254             "; output: " << context.GetJobOutput());
255     }
256 
257     return ret;
258 }
259 
CRemoteCgiJob(const IWorkerNodeInitContext &,const CRemoteAppLauncher & remote_app_launcher)260 CRemoteCgiJob::CRemoteCgiJob(const IWorkerNodeInitContext&,
261         const CRemoteAppLauncher& remote_app_launcher) :
262     m_RemoteAppLauncher(remote_app_launcher)
263 {
264     CGridGlobals::GetInstance().SetReuseJobObject(true);
265 }
266 
267 #define GRID_APP_NAME "remote_cgi"
268 extern const char kGridAppName[] = GRID_APP_NAME;
269 
270 using TRemoteAppJobFactory = CRemoteAppJobFactory<CRemoteCgiJob, CRemoteAppBaseListener, kGridAppName>;
271 
main(int argc,const char * argv[])272 int main(int argc, const char* argv[])
273 {
274     GRID_APP_CHECK_VERSION_ARGS();
275     return Main<TRemoteAppJobFactory, CRemoteAppBaseListener>(argc, argv);
276 }
277