1 // This file is part of BOINC.
2 // http://boinc.berkeley.edu
3 // Copyright (C) 2008 University of California
4 //
5 // BOINC is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU Lesser General Public License
7 // as published by the Free Software Foundation,
8 // either version 3 of the License, or (at your option) any later version.
9 //
10 // BOINC is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
13 // See the GNU Lesser General Public License for more details.
14 //
15 // You should have received a copy of the GNU Lesser General Public License
16 // along with BOINC. If not, see <http://www.gnu.org/licenses/>.
17
18 // The "policy" part of task execution is here.
19 // The "mechanism" part is in app.C
20 //
21
22 #include "cpp.h"
23
24 #ifdef _WIN32
25 #include "boinc_win.h"
26 #else
27 #include "config.h"
28 #include <cassert>
29 #include <csignal>
30 #endif
31
32 #include "error_numbers.h"
33 #include "filesys.h"
34 #include "md5_file.h"
35 #include "shmem.h"
36 #include "util.h"
37
38 #include "client_msgs.h"
39 #include "client_state.h"
40 #include "file_names.h"
41 #include "log_flags.h"
42 #include "project.h"
43 #include "result.h"
44
45 using std::vector;
46
47 // Clean up after finished apps.
48 // Called every second from the main polling loop.
49 //
handle_finished_apps()50 bool CLIENT_STATE::handle_finished_apps() {
51 ACTIVE_TASK* atp;
52 bool action = false;
53 static double last_time = 0;
54 if (!clock_change && now - last_time < HANDLE_FINISHED_APPS_PERIOD) return false;
55 last_time = now;
56
57 vector<ACTIVE_TASK*>::iterator iter;
58
59 iter = active_tasks.active_tasks.begin();
60 while (iter != active_tasks.active_tasks.end()) {
61 atp = *iter;
62 switch (atp->task_state()) {
63 case PROCESS_EXITED:
64 case PROCESS_WAS_SIGNALED:
65 case PROCESS_EXIT_UNKNOWN:
66 case PROCESS_COULDNT_START:
67 case PROCESS_ABORTED:
68 if (log_flags.task) {
69 msg_printf(atp->wup->project, MSG_INFO,
70 "Computation for task %s finished", atp->result->name
71 );
72 }
73 app_finished(*atp);
74 if (!action) {
75 adjust_rec(); // update REC before erasing ACTIVE_TASK
76 }
77 iter = active_tasks.active_tasks.erase(iter);
78 delete atp;
79 set_client_state_dirty("handle_finished_apps");
80
81 // the following is critical; otherwise the result is
82 // still in the "scheduled" list and enforce_schedule()
83 // will try to run it again.
84 //
85 request_schedule_cpus("handle_finished_apps");
86 action = true;
87 break;
88 default:
89 ++iter;
90 }
91 }
92 return action;
93 }
94
95 // Handle a task that has finished.
96 // Mark its output files as present, and delete scratch files.
97 // Don't delete input files because they might be shared with other WUs.
98 // Update state of result record.
99 //
app_finished(ACTIVE_TASK & at)100 int CLIENT_STATE::app_finished(ACTIVE_TASK& at) {
101 RESULT* rp = at.result;
102 bool had_error = false;
103
104 #ifndef SIM
105 FILE_INFO* fip;
106 unsigned int i;
107 char path[MAXPATHLEN];
108 int retval;
109 double size;
110
111 // scan the output files, check if missing or too big.
112 // Don't bother doing this if result was aborted via GUI or by project
113 //
114 switch (rp->exit_status) {
115 case EXIT_ABORTED_VIA_GUI:
116 case EXIT_ABORTED_BY_PROJECT:
117 break;
118 default:
119 for (i=0; i<rp->output_files.size(); i++) {
120 FILE_REF& fref = rp->output_files[i];
121 fip = fref.file_info;
122 if (fip->uploaded) continue;
123 get_pathname(fip, path, sizeof(path));
124 retval = file_size(path, size);
125 if (retval) {
126 if (fref.optional) {
127 fip->upload_urls.clear();
128 continue;
129 }
130
131 // an output file is unexpectedly absent.
132 //
133 fip->status = retval;
134 had_error = true;
135 msg_printf(
136 rp->project, MSG_INFO,
137 "Output file %s for task %s absent",
138 fip->name, rp->name
139 );
140 } else if (size > fip->max_nbytes) {
141 // Note: this is only checked when the application finishes.
142 // The total disk space is checked while the application is running.
143 //
144 msg_printf(
145 rp->project, MSG_INFO,
146 "Output file %s for task %s exceeds size limit.",
147 fip->name, rp->name
148 );
149 msg_printf(
150 rp->project, MSG_INFO,
151 "File size: %f bytes. Limit: %f bytes",
152 size, fip->max_nbytes
153 );
154
155 fip->delete_file();
156 fip->status = ERR_FILE_TOO_BIG;
157 had_error = true;
158 } else {
159 if (!fip->uploadable() && !fip->sticky) {
160 fip->delete_file(); // sets status to NOT_PRESENT
161 } else {
162 retval = 0;
163 if (fip->gzip_when_done) {
164 retval = fip->gzip();
165 }
166 if (!retval) {
167 retval = md5_file(path, fip->md5_cksum, fip->nbytes);
168 }
169 if (retval) {
170 fip->status = retval;
171 had_error = true;
172 } else {
173 fip->status = FILE_PRESENT;
174 }
175 }
176 }
177 }
178 }
179 #endif
180
181 if (rp->exit_status != 0) {
182 had_error = true;
183 }
184
185 if (had_error) {
186 switch (rp->exit_status) {
187 case EXIT_ABORTED_VIA_GUI:
188 case EXIT_ABORTED_BY_PROJECT:
189 rp->set_state(RESULT_ABORTED, "CS::app_finished");
190 break;
191 default:
192 rp->set_state(RESULT_COMPUTE_ERROR, "CS::app_finished");
193 }
194 rp->project->njobs_error++;
195 } else {
196 #ifdef SIM
197 rp->set_state(RESULT_FILES_UPLOADED, "CS::app_finished");
198 rp->set_ready_to_report();
199 rp->completed_time = now;
200 #else
201 rp->set_state(RESULT_FILES_UPLOADING, "CS::app_finished");
202 rp->append_log_record();
203 #endif
204 rp->project->update_duration_correction_factor(&at);
205 rp->project->njobs_success++;
206 }
207
208 double elapsed_time = now - rec_interval_start;
209 work_fetch.accumulate_inst_sec(&at, elapsed_time);
210
211 rp->project->pwf.request_if_idle_and_uploading = true;
212 // set this to allow work fetch if idle instance,
213 // even before upload finishes
214
215 return 0;
216 }
217
218 // Returns zero iff all the input files for a result are present
219 // (both WU and app version)
220 // Called from CLIENT_STATE::update_results (with verify=false)
221 // to transition result from DOWNLOADING to DOWNLOADED.
222 // Called from ACTIVE_TASK::start() (with verify=true)
223 // when project has verify_files_on_app_start set.
224 //
225 // If fipp is nonzero, return a pointer to offending FILE_INFO on error
226 //
input_files_available(RESULT * rp,bool verify_contents,FILE_INFO ** fipp)227 int CLIENT_STATE::input_files_available(
228 RESULT* rp, bool verify_contents, FILE_INFO** fipp
229 ) {
230 WORKUNIT* wup = rp->wup;
231 FILE_INFO* fip;
232 unsigned int i;
233 APP_VERSION* avp;
234 FILE_REF fr;
235 PROJECT* project = rp->project;
236 int retval;
237
238 avp = rp->avp;
239 for (i=0; i<avp->app_files.size(); i++) {
240 fr = avp->app_files[i];
241 fip = fr.file_info;
242 if (fip->status != FILE_PRESENT) {
243 if (fipp) *fipp = fip;
244 return ERR_FILE_MISSING;
245 }
246
247 // don't verify app files if using anonymous platform
248 //
249 if (verify_contents && !project->anonymous_platform) {
250 retval = fip->verify_file(true, true, false);
251 if (retval) {
252 if (fipp) *fipp = fip;
253 return retval;
254 }
255 }
256 }
257
258 for (i=0; i<wup->input_files.size(); i++) {
259 fip = wup->input_files[i].file_info;
260 if (fip->status != FILE_PRESENT) {
261 if (wup->input_files[i].optional) continue;
262 if (fipp) *fipp = fip;
263 return ERR_FILE_MISSING;
264 }
265 if (verify_contents) {
266 retval = fip->verify_file(true, true, false);
267 if (retval) {
268 if (fipp) *fipp = fip;
269 return retval;
270 }
271 }
272 }
273 return 0;
274 }
275
force_fraction(double f)276 inline double force_fraction(double f) {
277 if (f < 0) return 0;
278 if (f > 1) return 1;
279 return f;
280 }
281
get_fraction_done(RESULT * result)282 double CLIENT_STATE::get_fraction_done(RESULT* result) {
283 ACTIVE_TASK* atp = active_tasks.lookup_result(result);
284 return atp ? force_fraction(atp->fraction_done) : 0.0;
285 }
286
287 // Find latest version of app for given platform
288 // or -1 if can't find one
289 //
latest_version(APP * app,char * platform)290 int CLIENT_STATE::latest_version(APP* app, char* platform) {
291 unsigned int i;
292 int best = -1;
293
294 for (i=0; i<app_versions.size(); i++) {
295 APP_VERSION* avp = app_versions[i];
296 if (avp->app != app) continue;
297 if (strcmp(platform, avp->platform)) continue;
298 if (avp->version_num < best) continue;
299 best = avp->version_num;
300 }
301 return best;
302 }
303
304 // Find the ACTIVE_TASK in the current set with the matching PID
305 //
lookup_pid(int pid)306 ACTIVE_TASK* ACTIVE_TASK_SET::lookup_pid(int pid) {
307 unsigned int i;
308 ACTIVE_TASK* atp;
309
310 for (i=0; i<active_tasks.size(); i++) {
311 atp = active_tasks[i];
312 if (atp->pid == pid) return atp;
313 }
314 return NULL;
315 }
316
317 // Find the ACTIVE_TASK in the current set with the matching result
318 //
lookup_result(RESULT * result)319 ACTIVE_TASK* ACTIVE_TASK_SET::lookup_result(RESULT* result) {
320 unsigned int i;
321 ACTIVE_TASK* atp;
322
323 for (i=0; i<active_tasks.size(); i++) {
324 atp = active_tasks[i];
325 if (atp->result == result) {
326 return atp;
327 }
328 }
329 return NULL;
330 }
331