1 // This file is part of BOINC.
2 // http://boinc.berkeley.edu
3 // Copyright (C) 2008 University of California
4 //
5 // BOINC is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU Lesser General Public License
7 // as published by the Free Software Foundation,
8 // either version 3 of the License, or (at your option) any later version.
9 //
10 // BOINC is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
13 // See the GNU Lesser General Public License for more details.
14 //
15 // You should have received a copy of the GNU Lesser General Public License
16 // along with BOINC.  If not, see <http://www.gnu.org/licenses/>.
17 
18 #ifndef BOINC_SCHED_TYPES_H
19 #define BOINC_SCHED_TYPES_H
20 
21 #include <cstdio>
22 #include <vector>
23 
24 #include "boinc_db.h"
25 #include "common_defs.h"
26 #include "md5_file.h"
27 #include "coproc.h"
28 
29 #include "edf_sim.h"
30 
31 // for projects that support work filtering by app,
32 // this records an app for which the user will accept work
33 //
34 struct APP_INFO {
35     int appid;
36     int work_available;
37 };
38 
39 // represents a resource (disk etc.) that the client may not have enough of
40 //
41 struct RESOURCE {
42     bool insufficient;
43     double needed;      // the min extra amount needed
44 
set_insufficientRESOURCE45     inline void set_insufficient(double x) {
46         insufficient = true;
47         if (needed) {
48             if (x < needed) needed = x;
49         } else {
50             needed = x;
51         }
52     }
53 };
54 
55 // a message for the volunteer
56 //
57 struct USER_MESSAGE {
58     std::string message;
59     std::string priority;
60     USER_MESSAGE(const char* m, const char*p);
61 };
62 
63 struct HOST_USAGE {
64     int proc_type;
65     double gpu_usage;
66     double gpu_ram;
67     double avg_ncpus;
68     double max_ncpus;
69     double mem_usage;
70         // mem usage if specified by the plan class
71         // (overrides wu.rsc_memory_bound)
72     double projected_flops;
73         // the scheduler's best estimate of wu.rsc_fpops_est/elapsed_time.
74         // Taken from host_app_version elapsed time statistics if available,
75         // else on estimate provided by app_plan()
76     double peak_flops;
77         // stored in result.flops_estimate, and used for credit calculations
78     char cmdline[256];
79     char custom_coproc_type[256];
80         // if we're using a custom GPU type, its name
81         // TODO: get rid of PROC_TYPE_*, and this
82 
HOST_USAGEHOST_USAGE83     HOST_USAGE() {
84         proc_type = PROC_TYPE_CPU;
85         gpu_usage = 0;
86         gpu_ram = 0;
87         avg_ncpus = 1;
88         max_ncpus = 1;
89         mem_usage = 0;
90         projected_flops = 0;
91         peak_flops = 0;
92         strcpy(cmdline, "");
93         strcpy(custom_coproc_type, "");
94     }
sequential_appHOST_USAGE95     void sequential_app(double flops) {
96         proc_type = PROC_TYPE_CPU;
97         gpu_usage = 0;
98         gpu_ram = 0;
99         avg_ncpus = 1;
100         max_ncpus = 1;
101         mem_usage = 0;
102         if (flops <= 0) flops = 1e9;
103         projected_flops = flops;
104         peak_flops = flops;
105         strcpy(cmdline, "");
106     }
is_sequential_appHOST_USAGE107     inline bool is_sequential_app() {
108          if (proc_type != PROC_TYPE_CPU) return false;
109          if (avg_ncpus != 1) return false;
110          return true;
111     }
resource_typeHOST_USAGE112     inline int resource_type() {
113         switch (proc_type) {
114         case PROC_TYPE_NVIDIA_GPU: return ANON_PLATFORM_NVIDIA;
115         case PROC_TYPE_AMD_GPU: return ANON_PLATFORM_ATI;
116         case PROC_TYPE_INTEL_GPU: return ANON_PLATFORM_INTEL;
117         default: return ANON_PLATFORM_CPU;
118         }
119     }
uses_gpuHOST_USAGE120     inline bool uses_gpu() {
121         return (proc_type != PROC_TYPE_CPU);
122     }
123 };
124 
125 // a description of a sticky file on host, or a job input file
126 //
127 struct FILE_INFO {
128     char name[256];
129     double nbytes;
130     int status;
131     bool sticky;
132 
133     int parse(XML_PARSER&);
134 };
135 
136 struct MSG_FROM_HOST_DESC {
137     char variety[256];
138     std::string msg_text;
139     int parse(XML_PARSER&);
140 };
141 
142 // an app version from an anonymous-platform client
143 // (starting with 6.11, ALL clients send these)
144 //
145 struct CLIENT_APP_VERSION {
146     char app_name[256];
147     char platform[256];
148     int version_num;
149     char plan_class[256];
150     HOST_USAGE host_usage;
151     double rsc_fpops_scale;
152         // multiply wu.rsc_fpops_est and rsc_fpops_limit
153         // by this amount when send to client,
154         // to reflect the discrepancy between how fast the client
155         // thinks the app is versus how fast we think it is
156     APP* app;
157         // if NULL, this record is a place-holder,
158         // used to preserve array indices
159 
160     int parse(XML_PARSER&);
161 };
162 
163 // keep track of the best app_version for each app for this host
164 //
165 struct BEST_APP_VERSION {
166     DB_ID_TYPE appid;
167     bool for_64b_jobs;
168         // maintain this separately for jobs that need > 2GB RAM,
169         // in which case we can't use 32-bit apps
170 
171     bool present;
172         // false means there's no usable version for this app
173 
174     CLIENT_APP_VERSION* cavp;
175         // populated if anonymous platform
176 
177     APP_VERSION* avp;
178         // populated otherwise
179 
180     HOST_USAGE host_usage;
181         // populated in either case
182 
183     bool reliable;
184     bool trusted;
185 
186     DB_HOST_APP_VERSION* host_app_version();
187         // get the HOST_APP_VERSION, if any
188 
BEST_APP_VERSIONBEST_APP_VERSION189     BEST_APP_VERSION() {
190         appid = 0;
191         for_64b_jobs = false;
192         present = false;
193         cavp = NULL;
194         avp = NULL;
195         reliable = false;
196         trusted = false;
197     }
198 };
199 
200 struct SCHED_DB_RESULT : DB_RESULT {
201     // the following used by the scheduler, but not stored in the DB
202     //
203     char wu_name[256];
204     int units;      // used for granting credit by # of units processed
205     int parse_from_client(XML_PARSER&);
206     char platform_name[256];
207     BEST_APP_VERSION bav;
208 
209     int write_to_client(FILE*);
210 };
211 
212 // subset of global prefs used by scheduler
213 //
214 struct GLOBAL_PREFS {
215     double mod_time;
216     double disk_max_used_gb;
217     double disk_max_used_pct;
218     double disk_min_free_gb;
219     double work_buf_min_days;
220     double ram_max_used_busy_frac;
221     double ram_max_used_idle_frac;
222     double max_ncpus_pct;
223 
224     void parse(const char* buf, const char* venue);
225     void defaults();
work_buf_minGLOBAL_PREFS226     inline double work_buf_min() {return work_buf_min_days*86400;}
227 };
228 
229 struct GUI_URLS {
230     char* text;
231     void init();
232     void get_gui_urls(USER& user, HOST& host, TEAM& team, char*, int len);
233 };
234 
235 struct PROJECT_FILES {
236     char* text;
237     void init();
238 };
239 
240 // Represents a result from this project that the client has.
241 // The request message has a list of these.
242 // The reply message may include a list of those to be aborted
243 // or aborted if not started
244 //
245 struct OTHER_RESULT {
246     char name[256];
247     int app_version;    // index into CLIENT_APP_VERSION array
248     char plan_class[64];
249     bool have_plan_class;
250     bool abort;
251     bool abort_if_not_started;
252     int reason;     // see codes below
253 
254     int parse(XML_PARSER&);
255 };
256 
257 #define ABORT_REASON_NOT_FOUND      1
258 #define ABORT_REASON_WU_CANCELLED   2
259 #define ABORT_REASON_ASSIMILATED    3
260 #define ABORT_REASON_TIMED_OUT      4
261 
262 struct CLIENT_PLATFORM {
263     char name[256];
264     int parse(XML_PARSER&);
265 };
266 
267 struct PLATFORM_LIST {
268     std::vector<PLATFORM*> list;
269 };
270 
271 struct SCHEDULER_REQUEST {
272     char authenticator[256];
273     CLIENT_PLATFORM platform;
274     std::vector<CLIENT_PLATFORM> alt_platforms;
275     PLATFORM_LIST platforms;
276     char cross_project_id[256];
277     DB_ID_TYPE hostid;                 // zero if first RPC
278     int core_client_major_version;
279     int core_client_minor_version;
280     int core_client_release;
281     int core_client_version;    // 10000*major + 100*minor + release
282     int rpc_seqno;
283     double work_req_seconds;
284         // in "normalized CPU seconds" (see work_req.php)
285     double cpu_req_secs;
286     double cpu_req_instances;
287     double resource_share_fraction;
288         // this project's fraction of total resource share
289     double rrs_fraction;
290         // ... of runnable resource share
291     double prrs_fraction;
292         // ... of potentially runnable resource share
293     double cpu_estimated_delay;
294         // currently queued jobs saturate the CPU for this long;
295         // used for crude deadline check
296     double duration_correction_factor;
297     double uptime;
298     double previous_uptime;
299     char global_prefs_xml[BLOB_SIZE];
300     char working_global_prefs_xml[BLOB_SIZE];
301     char code_sign_key[4096];
302     bool dont_send_work;
303     char client_brand[256];
304         // as specified in client_brand.txt config file on client
305 
306     std::vector<CLIENT_APP_VERSION> client_app_versions;
307 
308     GLOBAL_PREFS global_prefs;
309     char global_prefs_source_email_hash[MD5_LEN];
310 
311     HOST host;      // request message is parsed into here.
312                     // does NOT contain the full host record.
313     COPROCS coprocs;
314     std::vector<SCHED_DB_RESULT> results;
315         // completed results being reported
316     bool results_truncated;
317         // set if (to limit memory usage) we capped this size of "results"
318         // In this case, don't resend lost results
319         // since we don't know what was lost.
320     std::vector<RESULT> file_xfer_results;
321     std::vector<MSG_FROM_HOST_DESC> msgs_from_host;
322     std::vector<FILE_INFO> file_infos;
323         // sticky files reported by host
324 
325     // temps used by locality scheduling:
326     std::vector<FILE_INFO> file_delete_candidates;
327         // deletion candidates
328     std::vector<FILE_INFO> files_not_needed;
329         // files no longer needed
330 
331     std::vector<OTHER_RESULT> other_results;
332         // in-progress results from this project
333     std::vector<IP_RESULT> ip_results;
334         // in-progress results from all projects
335     bool have_other_results_list;
336     bool have_ip_results_list;
337     bool have_time_stats_log;
338     bool client_cap_plan_class;
339     int sandbox;
340         // whether client uses account-based sandbox.  -1 = don't know
341     int allow_multiple_clients;
342         // whether client allows multiple clients per host, -1 don't know
343     bool using_weak_auth;
344         // Request uses weak authenticator.
345         // Don't modify user prefs or CPID
346     int last_rpc_dayofyear;
347     int current_rpc_dayofyear;
348     std::string client_opaque;
349 
350     SCHEDULER_REQUEST();
~SCHEDULER_REQUESTSCHEDULER_REQUEST351     ~SCHEDULER_REQUEST(){};
352     void clear();
353     const char* parse(XML_PARSER&);
354     int write(FILE*); // write request info to file: not complete
355 };
356 
357 // keep track of bottleneck disk preference
358 //
359 struct DISK_LIMITS {
360     double max_used;
361     double max_frac;
362     double min_free;
363 };
364 
365 // parsed version of project prefs that relate to scheduling
366 //
367 struct PROJECT_PREFS {
368     std::vector<APP_INFO> selected_apps;
369     bool dont_use_proc_type[NPROC_TYPES];
370     bool allow_non_selected_apps;
371     bool allow_beta_work;
372     int max_jobs_in_progress;
373     int max_cpus;
374 
375     void parse();
376 
PROJECT_PREFSPROJECT_PREFS377     PROJECT_PREFS() {
378         memset(&dont_use_proc_type, 0, sizeof(dont_use_proc_type));
379         allow_non_selected_apps = false;
380         allow_beta_work = false;
381         max_jobs_in_progress = 0;
382         max_cpus = 0;
383     }
384 };
385 
386 // summary of a client's request for work, and our response to it
387 // Note: this is zeroed out in SCHEDULER_REPLY constructor,
388 // so don't put any vectors here
389 //
390 struct WORK_REQ_BASE {
391     bool anonymous_platform;
392 
393     // the following defined if anonymous platform
394     //
395     bool client_has_apps_for_proc_type[NPROC_TYPES];
396 
397     // Flags used by old-style scheduling,
398     // while making multiple passes through the work array
399     //
400     bool infeasible_only;
401     bool reliable_only;
402     bool user_apps_only;
403     bool beta_only;
404     bool locality_sched_lite;
405         // for LSL apps, send only jobs where client has > 0 files
406 
407     bool resend_lost_results;
408         // this is set if the request is reporting a result
409         // that was previously reported.
410         // This is evidence that the earlier reply was not received
411         // by the client.  It may have contained results,
412         // so check and resend just in case.
413 
414     bool has_reliable_version;
415         // whether the host has a reliable app version
416 
417     int effective_ncpus;
418         // # of usable CPUs on host, taking prefs into account
419     int effective_ngpus;
420 
421     // 6.7+ clients send separate requests for different resource types:
422     //
423     double req_secs[NPROC_TYPES];
424         // instance-seconds requested
425     double req_instances[NPROC_TYPES];
426         // number of idle instances, use if possible
clear_reqWORK_REQ_BASE427     inline void clear_req(int proc_type) {
428         req_secs[proc_type] = 0;
429         req_instances[proc_type] = 0;
430     }
431 
432     // older clients send send a single number, the requested duration of jobs
433     //
434     double seconds_to_fill;
435 
436     // true if new-type request, which has resource-specific requests
437     //
438     bool rsc_spec_request;
439 
need_proc_typeWORK_REQ_BASE440     inline bool need_proc_type(int t) {
441         if (rsc_spec_request) {
442             return (req_secs[t]>0) || (req_instances[t]>0);
443         }
444         return seconds_to_fill > 0;
445     }
446 
447     double disk_available;
448     double ram, usable_ram;
449     double cpu_available_frac;
450     double gpu_available_frac;
451     int njobs_sent;
452 
453     // The following keep track of the "easiest" job that was rejected
454     // by EDF simulation.
455     // Any jobs harder than this can be rejected without doing the simulation.
456     //
457     double edf_reject_min_cpu;
458     int edf_reject_max_delay_bound;
459     bool have_edf_reject;
edf_rejectWORK_REQ_BASE460     void edf_reject(double cpu, int delay_bound) {
461         if (have_edf_reject) {
462             if (cpu < edf_reject_min_cpu) edf_reject_min_cpu = cpu;
463             if (delay_bound> edf_reject_max_delay_bound) edf_reject_max_delay_bound = delay_bound;
464         } else {
465             edf_reject_min_cpu = cpu;
466             edf_reject_max_delay_bound = delay_bound;
467             have_edf_reject = true;
468         }
469     }
edf_reject_testWORK_REQ_BASE470     bool edf_reject_test(double cpu, int delay_bound) {
471         if (!have_edf_reject) return false;
472         if (cpu < edf_reject_min_cpu) return false;
473         if (delay_bound > edf_reject_max_delay_bound) return false;
474         return true;
475     }
476 
477     RESOURCE disk;
478     RESOURCE mem;
479     RESOURCE speed;
480     RESOURCE bandwidth;
481 
482     // various reasons for not sending jobs (used to explain why)
483     //
484     bool no_allowed_apps_available;
485     bool hr_reject_temp;
486     bool hr_reject_perm;
487     bool outdated_client;
488     bool max_jobs_on_host_exceeded;
489     bool max_jobs_on_host_proc_type_exceeded[NPROC_TYPES];
490     bool no_jobs_available;     // project has no work right now
491     int max_jobs_per_rpc;
492 
max_jobs_exceededWORK_REQ_BASE493     bool max_jobs_exceeded() {
494         if (max_jobs_on_host_exceeded) return true;
495         for (int i=0; i<NPROC_TYPES; i++) {
496             if (max_jobs_on_host_proc_type_exceeded[i]) return true;
497         }
498         return false;
499     }
clearWORK_REQ_BASE500     void clear() {
501         memset(this, 0, sizeof(WORK_REQ_BASE));
502     }
503 
504 };
505 
506 struct WORK_REQ : public WORK_REQ_BASE {
507     PROJECT_PREFS project_prefs;
508     std::vector<USER_MESSAGE> no_work_messages;
509     std::vector<BEST_APP_VERSION*> best_app_versions;
510     std::vector<DB_HOST_APP_VERSION> host_app_versions;
511     std::vector<DB_HOST_APP_VERSION> host_app_versions_orig;
512 
513     void get_job_limits();
514     void add_no_work_message(const char*);
515 
~WORK_REQWORK_REQ516     ~WORK_REQ() {}
517 };
518 
519 // NOTE: if any field requires initialization,
520 // you must do it in the constructor.  Nothing is zeroed by default.
521 //
522 struct SCHEDULER_REPLY {
523     WORK_REQ wreq;
524     DISK_LIMITS disk_limits;
525     double request_delay;       // don't request again until this time elapses
526     std::vector<USER_MESSAGE> messages;
527     DB_ID_TYPE hostid;
528         // nonzero only if a new host record was created.
529         // this tells client to reset rpc_seqno
530     int lockfile_fd; // file descriptor of lockfile, or -1 if no lock.
531     bool send_global_prefs;
532     bool nucleus_only;          // send only message
533     USER user;
534     char email_hash[MD5_LEN];
535     HOST host;                  // after validation, contains full host rec
536     TEAM team;
537     std::vector<APP> apps;
538     std::vector<APP_VERSION> app_versions;
539     std::vector<WORKUNIT>wus;
540     std::vector<SCHED_DB_RESULT>results;
541     std::vector<std::string>result_acks;
542     std::vector<std::string>result_aborts;
543     std::vector<std::string>result_abort_if_not_starteds;
544     std::vector<MSG_TO_HOST>msgs_to_host;
545     std::vector<FILE_INFO>file_deletes;
546     std::vector<std::string> file_transfer_requests;
547     char code_sign_key[4096];
548     char code_sign_key_signature[4096];
549     bool send_msg_ack;
550     bool project_is_down;
551     std::vector<APP_VERSION>old_app_versions;
552         // superceded app versions that we consider using because of
553         // homogeneous app version.
554 
555     SCHEDULER_REPLY();
~SCHEDULER_REPLYSCHEDULER_REPLY556     ~SCHEDULER_REPLY(){};
557     int write(FILE*, SCHEDULER_REQUEST&);
558     void insert_app_unique(APP&);
559     void insert_app_version_unique(APP_VERSION&);
560     void insert_workunit_unique(WORKUNIT&);
561     void insert_result(SCHED_DB_RESULT&);
562     void insert_message(const char* msg, const char* prio);
563     void insert_message(USER_MESSAGE&);
564     void set_delay(double);
565 };
566 
567 extern SCHEDULER_REQUEST* g_request;
568 extern SCHEDULER_REPLY* g_reply;
569 extern WORK_REQ* g_wreq;
570 extern double capped_host_fpops();
571 
add_no_work_message(const char * m)572 static inline void add_no_work_message(const char* m) {
573     g_wreq->add_no_work_message(m);
574 }
575 
576 extern void get_weak_auth(USER&, char*);
577 extern void get_rss_auth(USER&, char*);
578 extern void read_host_app_versions();
579 extern DB_HOST_APP_VERSION* get_host_app_version(DB_ID_TYPE gavid);
580 extern void write_host_app_versions();
581 
582 extern DB_HOST_APP_VERSION* gavid_to_havp(DB_ID_TYPE gavid);
583 extern DB_HOST_APP_VERSION* quota_exceeded_version();
584 
is_64b_platform(const char * name)585 inline bool is_64b_platform(const char* name) {
586     return (strstr(name, "64") != NULL);
587 }
588 
589 extern double available_frac(BEST_APP_VERSION&);
590 
591 #endif
592