1 // This file is part of BOINC.
2 // http://boinc.berkeley.edu
3 // Copyright (C) 2008 University of California
4 //
5 // BOINC is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU Lesser General Public License
7 // as published by the Free Software Foundation,
8 // either version 3 of the License, or (at your option) any later version.
9 //
10 // BOINC is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
13 // See the GNU Lesser General Public License for more details.
14 //
15 // You should have received a copy of the GNU Lesser General Public License
16 // along with BOINC.  If not, see <http://www.gnu.org/licenses/>.
17 
18 // The "policy" part of task execution is here.
19 // The "mechanism" part is in app.C
20 //
21 
22 #include "cpp.h"
23 
24 #ifdef _WIN32
25 #include "boinc_win.h"
26 #else
27 #include "config.h"
28 #include <cassert>
29 #include <csignal>
30 #endif
31 
32 #include "error_numbers.h"
33 #include "filesys.h"
34 #include "md5_file.h"
35 #include "shmem.h"
36 #include "util.h"
37 
38 #include "client_msgs.h"
39 #include "client_state.h"
40 #include "file_names.h"
41 #include "log_flags.h"
42 #include "project.h"
43 #include "result.h"
44 
45 using std::vector;
46 
47 // Clean up after finished apps.
48 // Called every second from the main polling loop.
49 //
handle_finished_apps()50 bool CLIENT_STATE::handle_finished_apps() {
51     ACTIVE_TASK* atp;
52     bool action = false;
53     static double last_time = 0;
54     if (!clock_change && now - last_time < HANDLE_FINISHED_APPS_PERIOD) return false;
55     last_time = now;
56 
57     vector<ACTIVE_TASK*>::iterator iter;
58 
59     iter = active_tasks.active_tasks.begin();
60     while (iter != active_tasks.active_tasks.end()) {
61         atp = *iter;
62         switch (atp->task_state()) {
63         case PROCESS_EXITED:
64         case PROCESS_WAS_SIGNALED:
65         case PROCESS_EXIT_UNKNOWN:
66         case PROCESS_COULDNT_START:
67         case PROCESS_ABORTED:
68             if (log_flags.task) {
69                 msg_printf(atp->wup->project, MSG_INFO,
70                     "Computation for task %s finished", atp->result->name
71                 );
72             }
73             app_finished(*atp);
74             if (!action) {
75                 adjust_rec();     // update REC before erasing ACTIVE_TASK
76             }
77             iter = active_tasks.active_tasks.erase(iter);
78             delete atp;
79             set_client_state_dirty("handle_finished_apps");
80 
81             // the following is critical; otherwise the result is
82             // still in the "scheduled" list and enforce_schedule()
83             // will try to run it again.
84             //
85             request_schedule_cpus("handle_finished_apps");
86             action = true;
87             break;
88         default:
89             ++iter;
90         }
91     }
92     return action;
93 }
94 
95 // Handle a task that has finished.
96 // Mark its output files as present, and delete scratch files.
97 // Don't delete input files because they might be shared with other WUs.
98 // Update state of result record.
99 //
app_finished(ACTIVE_TASK & at)100 int CLIENT_STATE::app_finished(ACTIVE_TASK& at) {
101     RESULT* rp = at.result;
102     bool had_error = false;
103 
104 #ifndef SIM
105     FILE_INFO* fip;
106     unsigned int i;
107     char path[MAXPATHLEN];
108     int retval;
109     double size;
110 
111     // scan the output files, check if missing or too big.
112     // Don't bother doing this if result was aborted via GUI or by project
113     //
114     switch (rp->exit_status) {
115     case EXIT_ABORTED_VIA_GUI:
116     case EXIT_ABORTED_BY_PROJECT:
117         break;
118     default:
119         for (i=0; i<rp->output_files.size(); i++) {
120             FILE_REF& fref = rp->output_files[i];
121             fip = fref.file_info;
122             if (fip->uploaded) continue;
123             get_pathname(fip, path, sizeof(path));
124             retval = file_size(path, size);
125             if (retval) {
126                 if (fref.optional) {
127                     fip->upload_urls.clear();
128                     continue;
129                 }
130 
131                 // an output file is unexpectedly absent.
132                 //
133                 fip->status = retval;
134                 had_error = true;
135                 msg_printf(
136                     rp->project, MSG_INFO,
137                     "Output file %s for task %s absent",
138                     fip->name, rp->name
139                 );
140             } else if (size > fip->max_nbytes) {
141                 // Note: this is only checked when the application finishes.
142                 // The total disk space is checked while the application is running.
143                 //
144                 msg_printf(
145                     rp->project, MSG_INFO,
146                     "Output file %s for task %s exceeds size limit.",
147                     fip->name, rp->name
148                 );
149                 msg_printf(
150                     rp->project, MSG_INFO,
151                     "File size: %f bytes.  Limit: %f bytes",
152                     size, fip->max_nbytes
153                 );
154 
155                 fip->delete_file();
156                 fip->status = ERR_FILE_TOO_BIG;
157                 had_error = true;
158             } else {
159                 if (!fip->uploadable() && !fip->sticky) {
160                     fip->delete_file();     // sets status to NOT_PRESENT
161                 } else {
162                     retval = 0;
163                     if (fip->gzip_when_done) {
164                         retval = fip->gzip();
165                     }
166                     if (!retval) {
167                         retval = md5_file(path, fip->md5_cksum, fip->nbytes);
168                     }
169                     if (retval) {
170                         fip->status = retval;
171                         had_error = true;
172                     } else {
173                         fip->status = FILE_PRESENT;
174                     }
175                 }
176             }
177         }
178     }
179 #endif
180 
181     if (rp->exit_status != 0) {
182         had_error = true;
183     }
184 
185     if (had_error) {
186         switch (rp->exit_status) {
187         case EXIT_ABORTED_VIA_GUI:
188         case EXIT_ABORTED_BY_PROJECT:
189             rp->set_state(RESULT_ABORTED, "CS::app_finished");
190             break;
191         default:
192             rp->set_state(RESULT_COMPUTE_ERROR, "CS::app_finished");
193         }
194         rp->project->njobs_error++;
195     } else {
196 #ifdef SIM
197         rp->set_state(RESULT_FILES_UPLOADED, "CS::app_finished");
198         rp->set_ready_to_report();
199         rp->completed_time = now;
200 #else
201         rp->set_state(RESULT_FILES_UPLOADING, "CS::app_finished");
202         rp->append_log_record();
203 #endif
204         rp->project->update_duration_correction_factor(&at);
205         rp->project->njobs_success++;
206     }
207 
208     double elapsed_time = now - rec_interval_start;
209     work_fetch.accumulate_inst_sec(&at, elapsed_time);
210 
211     rp->project->pwf.request_if_idle_and_uploading = true;
212         // set this to allow work fetch if idle instance,
213         // even before upload finishes
214 
215     return 0;
216 }
217 
218 // Returns zero iff all the input files for a result are present
219 // (both WU and app version)
220 // Called from CLIENT_STATE::update_results (with verify=false)
221 // to transition result from DOWNLOADING to DOWNLOADED.
222 // Called from ACTIVE_TASK::start() (with verify=true)
223 // when project has verify_files_on_app_start set.
224 //
225 // If fipp is nonzero, return a pointer to offending FILE_INFO on error
226 //
input_files_available(RESULT * rp,bool verify_contents,FILE_INFO ** fipp)227 int CLIENT_STATE::input_files_available(
228     RESULT* rp, bool verify_contents, FILE_INFO** fipp
229 ) {
230     WORKUNIT* wup = rp->wup;
231     FILE_INFO* fip;
232     unsigned int i;
233     APP_VERSION* avp;
234     FILE_REF fr;
235     PROJECT* project = rp->project;
236     int retval;
237 
238     avp = rp->avp;
239     for (i=0; i<avp->app_files.size(); i++) {
240         fr = avp->app_files[i];
241         fip = fr.file_info;
242         if (fip->status != FILE_PRESENT) {
243             if (fipp) *fipp = fip;
244             return ERR_FILE_MISSING;
245         }
246 
247         // don't verify app files if using anonymous platform
248         //
249         if (verify_contents && !project->anonymous_platform) {
250             retval = fip->verify_file(true, true, false);
251             if (retval) {
252                 if (fipp) *fipp = fip;
253                 return retval;
254             }
255         }
256     }
257 
258     for (i=0; i<wup->input_files.size(); i++) {
259         fip = wup->input_files[i].file_info;
260         if (fip->status != FILE_PRESENT) {
261             if (wup->input_files[i].optional) continue;
262             if (fipp) *fipp = fip;
263             return ERR_FILE_MISSING;
264         }
265         if (verify_contents) {
266             retval = fip->verify_file(true, true, false);
267             if (retval) {
268                 if (fipp) *fipp = fip;
269                 return retval;
270             }
271         }
272     }
273     return 0;
274 }
275 
force_fraction(double f)276 inline double force_fraction(double f) {
277     if (f < 0) return 0;
278     if (f > 1) return 1;
279     return f;
280 }
281 
get_fraction_done(RESULT * result)282 double CLIENT_STATE::get_fraction_done(RESULT* result) {
283     ACTIVE_TASK* atp = active_tasks.lookup_result(result);
284     return atp ? force_fraction(atp->fraction_done) : 0.0;
285 }
286 
287 // Find latest version of app for given platform
288 // or -1 if can't find one
289 //
latest_version(APP * app,char * platform)290 int CLIENT_STATE::latest_version(APP* app, char* platform) {
291     unsigned int i;
292     int best = -1;
293 
294     for (i=0; i<app_versions.size(); i++) {
295         APP_VERSION* avp = app_versions[i];
296         if (avp->app != app) continue;
297         if (strcmp(platform, avp->platform)) continue;
298         if (avp->version_num < best) continue;
299         best = avp->version_num;
300     }
301     return best;
302 }
303 
304 // Find the ACTIVE_TASK in the current set with the matching PID
305 //
lookup_pid(int pid)306 ACTIVE_TASK* ACTIVE_TASK_SET::lookup_pid(int pid) {
307     unsigned int i;
308     ACTIVE_TASK* atp;
309 
310     for (i=0; i<active_tasks.size(); i++) {
311         atp = active_tasks[i];
312         if (atp->pid == pid) return atp;
313     }
314     return NULL;
315 }
316 
317 // Find the ACTIVE_TASK in the current set with the matching result
318 //
lookup_result(RESULT * result)319 ACTIVE_TASK* ACTIVE_TASK_SET::lookup_result(RESULT* result) {
320     unsigned int i;
321     ACTIVE_TASK* atp;
322 
323     for (i=0; i<active_tasks.size(); i++) {
324         atp = active_tasks[i];
325         if (atp->result == result) {
326             return atp;
327         }
328     }
329     return NULL;
330 }
331