1 /*
2 * Home page of code is: https://www.smartmontools.org
3 *
4 * Copyright (C) 2002-11 Bruce Allen
5 * Copyright (C) 2008-20 Christian Franke
6 * Copyright (C) 2000 Michael Cornwell <cornwell@acm.org>
7 * Copyright (C) 2008 Oliver Bock <brevilo@users.sourceforge.net>
8 *
9 * SPDX-License-Identifier: GPL-2.0-or-later
10 */
11
12 #include "config.h"
13 #define __STDC_FORMAT_MACROS 1 // enable PRI* for C++
14
15 // unconditionally included files
16 #include <inttypes.h>
17 #include <stdio.h>
18 #include <sys/types.h>
19 #include <sys/stat.h> // umask
20 #include <signal.h>
21 #include <fcntl.h>
22 #include <string.h>
23 #include <syslog.h>
24 #include <stdarg.h>
25 #include <stdlib.h>
26 #include <errno.h>
27 #include <time.h>
28 #include <limits.h>
29 #include <getopt.h>
30
31 #include <algorithm> // std::replace()
32 #include <map>
33 #include <stdexcept>
34 #include <string>
35 #include <vector>
36
37 // conditionally included files
38 #ifndef _WIN32
39 #include <sys/wait.h>
40 #endif
41 #ifdef HAVE_UNISTD_H
42 #include <unistd.h>
43 #endif
44
45 #ifdef _WIN32
46 #include "os_win32/popen.h" // popen/pclose()
47 #ifdef _MSC_VER
48 #pragma warning(disable:4761) // "conversion supplied"
49 typedef unsigned short mode_t;
50 typedef int pid_t;
51 #endif
52 #include <io.h> // umask()
53 #include <process.h> // getpid()
54 #endif // _WIN32
55
56 #ifdef __CYGWIN__
57 #include <io.h> // setmode()
58 #endif // __CYGWIN__
59
60 #ifdef HAVE_LIBCAP_NG
61 #include <cap-ng.h>
62 #endif // LIBCAP_NG
63
64 #ifdef HAVE_LIBSYSTEMD
65 #include <systemd/sd-daemon.h>
66 #endif // HAVE_LIBSYSTEMD
67
68 // locally included files
69 #include "atacmds.h"
70 #include "dev_interface.h"
71 #include "knowndrives.h"
72 #include "scsicmds.h"
73 #include "nvmecmds.h"
74 #include "utility.h"
75
76 #ifdef _WIN32
77 // fork()/signal()/initd simulation for native Windows
78 #include "os_win32/daemon_win32.h" // daemon_main/detach/signal()
79 #define strsignal daemon_strsignal
80 #define sleep daemon_sleep
81 // SIGQUIT does not exist, CONTROL-Break signals SIGBREAK.
82 #define SIGQUIT SIGBREAK
83 #define SIGQUIT_KEYNAME "CONTROL-Break"
84 #else // _WIN32
85 #define SIGQUIT_KEYNAME "CONTROL-\\"
86 #endif // _WIN32
87
88 const char * smartd_cpp_cvsid = "$Id: smartd.cpp 5118 2020-11-23 18:25:16Z chrfranke $"
89 CONFIG_H_CVSID;
90
91 extern "C" {
92 typedef void (*signal_handler_type)(int);
93 }
94
set_signal_if_not_ignored(int sig,signal_handler_type handler)95 static void set_signal_if_not_ignored(int sig, signal_handler_type handler)
96 {
97 #if defined(_WIN32)
98 // signal() emulation
99 daemon_signal(sig, handler);
100
101 #elif defined(HAVE_SIGACTION)
102 // SVr4, POSIX.1-2001, POSIX.1-2008
103 struct sigaction sa;
104 sa.sa_handler = SIG_DFL;
105 sigaction(sig, (struct sigaction *)0, &sa);
106 if (sa.sa_handler == SIG_IGN)
107 return;
108
109 memset(&sa, 0, sizeof(sa));
110 sa.sa_handler = handler;
111 sa.sa_flags = SA_RESTART; // BSD signal() semantics
112 sigaction(sig, &sa, (struct sigaction *)0);
113
114 #elif defined(HAVE_SIGSET)
115 // SVr4, POSIX.1-2001, obsoleted in POSIX.1-2008
116 if (sigset(sig, handler) == SIG_IGN)
117 sigset(sig, SIG_IGN);
118
119 #else
120 // POSIX.1-2001, POSIX.1-2008, C89, C99, undefined semantics.
121 // Important: BSD semantics is required. Traditional signal()
122 // resets the handler to SIG_DFL after the first signal is caught.
123 if (signal(sig, handler) == SIG_IGN)
124 signal(sig, SIG_IGN);
125 #endif
126 }
127
128 using namespace smartmontools;
129
130 // smartd exit codes
131 #define EXIT_BADCMD 1 // command line did not parse
132 #define EXIT_BADCONF 2 // syntax error in config file
133 #define EXIT_STARTUP 3 // problem forking daemon
134 #define EXIT_PID 4 // problem creating pid file
135 #define EXIT_NOCONF 5 // config file does not exist
136 #define EXIT_READCONF 6 // config file exists but cannot be read
137
138 #define EXIT_NOMEM 8 // out of memory
139 #define EXIT_BADCODE 10 // internal error - should NEVER happen
140
141 #define EXIT_BADDEV 16 // we can't monitor this device
142 #define EXIT_NODEV 17 // no devices to monitor
143
144 #define EXIT_SIGNAL 254 // abort on signal
145
146
147 // command-line: 1=debug mode, 2=print presets
148 static unsigned char debugmode = 0;
149
150 // command-line: how long to sleep between checks
151 #define CHECKTIME 1800
152 static int checktime=CHECKTIME;
153
154 // command-line: name of PID file (empty for no pid file)
155 static std::string pid_file;
156
157 // command-line: path prefix of persistent state file, empty if no persistence.
158 static std::string state_path_prefix
159 #ifdef SMARTMONTOOLS_SAVESTATES
160 = SMARTMONTOOLS_SAVESTATES
161 #endif
162 ;
163
164 // command-line: path prefix of attribute log file, empty if no logs.
165 static std::string attrlog_path_prefix
166 #ifdef SMARTMONTOOLS_ATTRIBUTELOG
167 = SMARTMONTOOLS_ATTRIBUTELOG
168 #endif
169 ;
170
171 // configuration file name
172 static const char * configfile;
173 // configuration file "name" if read from stdin
174 static const char * const configfile_stdin = "<stdin>";
175 // path of alternate configuration file
176 static std::string configfile_alt;
177
178 // warning script file
179 static std::string warning_script;
180
181 // command-line: when should we exit?
182 enum quit_t {
183 QUIT_NODEV, QUIT_NODEVSTARTUP, QUIT_NEVER, QUIT_ONECHECK,
184 QUIT_SHOWTESTS, QUIT_ERRORS
185 };
186 static quit_t quit = QUIT_NODEV;
187
188 // command-line; this is the default syslog(3) log facility to use.
189 static int facility=LOG_DAEMON;
190
191 #ifndef _WIN32
192 // command-line: fork into background?
193 static bool do_fork=true;
194 #endif
195
196 // TODO: This smartctl only variable is also used in some os_*.cpp
197 unsigned char failuretest_permissive = 0;
198
199 // set to one if we catch a USR1 (check devices now)
200 static volatile int caughtsigUSR1=0;
201
202 #ifdef _WIN32
203 // set to one if we catch a USR2 (toggle debug mode)
204 static volatile int caughtsigUSR2=0;
205 #endif
206
207 // set to one if we catch a HUP (reload config file). In debug mode,
208 // set to two, if we catch INT (also reload config file).
209 static volatile int caughtsigHUP=0;
210
211 // set to signal value if we catch INT, QUIT, or TERM
212 static volatile int caughtsigEXIT=0;
213
214 // This function prints either to stdout or to the syslog as needed.
215 static void PrintOut(int priority, const char *fmt, ...)
216 __attribute_format_printf(2, 3);
217
218 #ifdef HAVE_LIBSYSTEMD
219 // systemd notify support
220
221 static bool notify_enabled = false;
222
notify_init()223 static inline void notify_init()
224 {
225 if (!getenv("NOTIFY_SOCKET"))
226 return;
227 notify_enabled = true;
228 }
229
notify_post_init()230 static inline bool notify_post_init()
231 {
232 if (!notify_enabled)
233 return true;
234 if (do_fork) {
235 PrintOut(LOG_CRIT, "Option -n (--no-fork) is required if 'Type=notify' is set.\n");
236 return false;
237 }
238 return true;
239 }
240
notify_msg(const char * msg,bool ready=false)241 static void notify_msg(const char * msg, bool ready = false)
242 {
243 if (!notify_enabled)
244 return;
245 if (debugmode) {
246 pout("sd_notify(0, \"%sSTATUS=%s\")\n", (ready ? "READY=1\\n" : ""), msg);
247 return;
248 }
249 sd_notifyf(0, "%sSTATUS=%s", (ready ? "READY=1\n" : ""), msg);
250 }
251
notify_check(int numdev)252 static void notify_check(int numdev)
253 {
254 if (!notify_enabled)
255 return;
256 char msg[32];
257 snprintf(msg, sizeof(msg), "Checking %d device%s ...",
258 numdev, (numdev != 1 ? "s" : ""));
259 notify_msg(msg);
260 }
261
notify_wait(time_t wakeuptime,int numdev)262 static void notify_wait(time_t wakeuptime, int numdev)
263 {
264 if (!notify_enabled)
265 return;
266 char ts[16] = ""; struct tm tmbuf;
267 strftime(ts, sizeof(ts), "%H:%M:%S", time_to_tm_local(&tmbuf, wakeuptime));
268 char msg[64];
269 snprintf(msg, sizeof(msg), "Next check of %d device%s will start at %s",
270 numdev, (numdev != 1 ? "s" : ""), ts);
271 static bool ready = true; // first call notifies READY=1
272 notify_msg(msg, ready);
273 ready = false;
274 }
275
notify_exit(int status)276 static void notify_exit(int status)
277 {
278 if (!notify_enabled)
279 return;
280 const char * msg;
281 switch (status) {
282 case 0: msg = "Exiting ..."; break;
283 case EXIT_BADCMD: msg = "Error in command line (see SYSLOG)"; break;
284 case EXIT_BADCONF: case EXIT_NOCONF:
285 case EXIT_READCONF: msg = "Error in config file (see SYSLOG)"; break;
286 case EXIT_BADDEV: msg = "Unable to register a device (see SYSLOG)"; break;
287 case EXIT_NODEV: msg = "No devices to monitor"; break;
288 default: msg = "Error (see SYSLOG)"; break;
289 }
290 notify_msg(msg);
291 }
292
293 #else // HAVE_LIBSYSTEMD
294 // No systemd notify support
295
notify_post_init()296 static inline bool notify_post_init()
297 {
298 #ifdef __linux__
299 if (getenv("NOTIFY_SOCKET")) {
300 PrintOut(LOG_CRIT, "This version of smartd was build without 'Type=notify' support.\n");
301 return false;
302 }
303 #endif
304 return true;
305 }
306
notify_init()307 static inline void notify_init() { }
notify_msg(const char *)308 static inline void notify_msg(const char *) { }
notify_check(int)309 static inline void notify_check(int) { }
notify_wait(time_t,int)310 static inline void notify_wait(time_t, int) { }
notify_exit(int)311 static inline void notify_exit(int) { }
312
313 #endif // HAVE_LIBSYSTEMD
314
315 // Attribute monitoring flags.
316 // See monitor_attr_flags below.
317 enum {
318 MONITOR_IGN_FAILUSE = 0x01,
319 MONITOR_IGNORE = 0x02,
320 MONITOR_RAW_PRINT = 0x04,
321 MONITOR_RAW = 0x08,
322 MONITOR_AS_CRIT = 0x10,
323 MONITOR_RAW_AS_CRIT = 0x20,
324 };
325
326 // Array of flags for each attribute.
327 class attribute_flags
328 {
329 public:
attribute_flags()330 attribute_flags()
331 { memset(m_flags, 0, sizeof(m_flags)); }
332
is_set(int id,unsigned char flag) const333 bool is_set(int id, unsigned char flag) const
334 { return (0 < id && id < (int)sizeof(m_flags) && (m_flags[id] & flag)); }
335
set(int id,unsigned char flags)336 void set(int id, unsigned char flags)
337 {
338 if (0 < id && id < (int)sizeof(m_flags))
339 m_flags[id] |= flags;
340 }
341
342 private:
343 unsigned char m_flags[256];
344 };
345
346
347 /// Configuration data for a device. Read from smartd.conf.
348 /// Supports copy & assignment and is compatible with STL containers.
349 struct dev_config
350 {
351 int lineno; // Line number of entry in file
352 std::string name; // Device name (with optional extra info)
353 std::string dev_name; // Device name (plain, for SMARTD_DEVICE variable)
354 std::string dev_type; // Device type argument from -d directive, empty if none
355 std::string dev_idinfo; // Device identify info for warning emails
356 std::string state_file; // Path of the persistent state file, empty if none
357 std::string attrlog_file; // Path of the persistent attrlog file, empty if none
358 bool ignore; // Ignore this entry
359 bool id_is_unique; // True if dev_idinfo is unique (includes S/N or WWN)
360 bool smartcheck; // Check SMART status
361 bool usagefailed; // Check for failed Usage Attributes
362 bool prefail; // Track changes in Prefail Attributes
363 bool usage; // Track changes in Usage Attributes
364 bool selftest; // Monitor number of selftest errors
365 bool errorlog; // Monitor number of ATA errors
366 bool xerrorlog; // Monitor number of ATA errors (Extended Comprehensive error log)
367 bool offlinests; // Monitor changes in offline data collection status
368 bool offlinests_ns; // Disable auto standby if in progress
369 bool selfteststs; // Monitor changes in self-test execution status
370 bool selfteststs_ns; // Disable auto standby if in progress
371 bool permissive; // Ignore failed SMART commands
372 char autosave; // 1=disable, 2=enable Autosave Attributes
373 char autoofflinetest; // 1=disable, 2=enable Auto Offline Test
374 firmwarebug_defs firmwarebugs; // -F directives from drivedb or smartd.conf
375 bool ignorepresets; // Ignore database of -v options
376 bool showpresets; // Show database entry for this device
377 bool removable; // Device may disappear (not be present)
378 char powermode; // skip check, if disk in idle or standby mode
379 bool powerquiet; // skip powermode 'skipping checks' message
380 int powerskipmax; // how many times can be check skipped
381 unsigned char tempdiff; // Track Temperature changes >= this limit
382 unsigned char tempinfo, tempcrit; // Track Temperatures >= these limits as LOG_INFO, LOG_CRIT+mail
383 regular_expression test_regex; // Regex for scheduled testing
384 unsigned test_offset_factor; // Factor for staggering of scheduled tests
385
386 // Configuration of email warning messages
387 std::string emailcmdline; // script to execute, empty if no messages
388 std::string emailaddress; // email address, or empty
389 unsigned char emailfreq; // Emails once (1) daily (2) diminishing (3)
390 bool emailtest; // Send test email?
391
392 // ATA ONLY
393 int dev_rpm; // rotation rate, 0 = unknown, 1 = SSD, >1 = HDD
394 int set_aam; // disable(-1), enable(1..255->0..254) Automatic Acoustic Management
395 int set_apm; // disable(-1), enable(2..255->1..254) Advanced Power Management
396 int set_lookahead; // disable(-1), enable(1) read look-ahead
397 int set_standby; // set(1..255->0..254) standby timer
398 bool set_security_freeze; // Freeze ATA security
399 int set_wcache; // disable(-1), enable(1) write cache
400 int set_dsn; // disable(0x2), enable(0x1) DSN
401
402 bool sct_erc_set; // set SCT ERC to:
403 unsigned short sct_erc_readtime; // ERC read time (deciseconds)
404 unsigned short sct_erc_writetime; // ERC write time (deciseconds)
405
406 unsigned char curr_pending_id; // ID of current pending sector count, 0 if none
407 unsigned char offl_pending_id; // ID of offline uncorrectable sector count, 0 if none
408 bool curr_pending_incr, offl_pending_incr; // True if current/offline pending values increase
409 bool curr_pending_set, offl_pending_set; // True if '-C', '-U' set in smartd.conf
410
411 attribute_flags monitor_attr_flags; // MONITOR_* flags for each attribute
412
413 ata_vendor_attr_defs attribute_defs; // -v options
414
415 dev_config();
416 };
417
dev_config()418 dev_config::dev_config()
419 : lineno(0),
420 ignore(false),
421 id_is_unique(false),
422 smartcheck(false),
423 usagefailed(false),
424 prefail(false),
425 usage(false),
426 selftest(false),
427 errorlog(false),
428 xerrorlog(false),
429 offlinests(false), offlinests_ns(false),
430 selfteststs(false), selfteststs_ns(false),
431 permissive(false),
432 autosave(0),
433 autoofflinetest(0),
434 ignorepresets(false),
435 showpresets(false),
436 removable(false),
437 powermode(0),
438 powerquiet(false),
439 powerskipmax(0),
440 tempdiff(0),
441 tempinfo(0), tempcrit(0),
442 test_offset_factor(0),
443 emailfreq(0),
444 emailtest(false),
445 dev_rpm(0),
446 set_aam(0), set_apm(0),
447 set_lookahead(0),
448 set_standby(0),
449 set_security_freeze(false),
450 set_wcache(0), set_dsn(0),
451 sct_erc_set(false),
452 sct_erc_readtime(0), sct_erc_writetime(0),
453 curr_pending_id(0), offl_pending_id(0),
454 curr_pending_incr(false), offl_pending_incr(false),
455 curr_pending_set(false), offl_pending_set(false)
456 {
457 }
458
459
460 // Number of allowed mail message types
461 static const int SMARTD_NMAIL = 13;
462 // Type for '-M test' mails (state not persistent)
463 static const int MAILTYPE_TEST = 0;
464 // TODO: Add const or enum for all mail types.
465
466 struct mailinfo {
467 int logged;// number of times an email has been sent
468 time_t firstsent;// time first email was sent, as defined by time(2)
469 time_t lastsent; // time last email was sent, as defined by time(2)
470
mailinfomailinfo471 mailinfo()
472 : logged(0), firstsent(0), lastsent(0) { }
473 };
474
475 /// Persistent state data for a device.
476 struct persistent_dev_state
477 {
478 unsigned char tempmin, tempmax; // Min/Max Temperatures
479
480 unsigned char selflogcount; // total number of self-test errors
481 unsigned short selfloghour; // lifetime hours of last self-test error
482
483 time_t scheduled_test_next_check; // Time of next check for scheduled self-tests
484
485 uint64_t selective_test_last_start; // Start LBA of last scheduled selective self-test
486 uint64_t selective_test_last_end; // End LBA of last scheduled selective self-test
487
488 mailinfo maillog[SMARTD_NMAIL]; // log info on when mail sent
489
490 // ATA ONLY
491 int ataerrorcount; // Total number of ATA errors
492
493 // Persistent part of ata_smart_values:
494 struct ata_attribute {
495 unsigned char id;
496 unsigned char val;
497 unsigned char worst; // Byte needed for 'raw64' attribute only.
498 uint64_t raw;
499 unsigned char resvd;
500
ata_attributepersistent_dev_state::ata_attribute501 ata_attribute() : id(0), val(0), worst(0), raw(0), resvd(0) { }
502 };
503 ata_attribute ata_attributes[NUMBER_ATA_SMART_ATTRIBUTES];
504
505 // SCSI ONLY
506
507 struct scsi_error_counter_t {
508 struct scsiErrorCounter errCounter;
509 unsigned char found;
scsi_error_counter_tpersistent_dev_state::scsi_error_counter_t510 scsi_error_counter_t() : found(0)
511 { memset(&errCounter, 0, sizeof(errCounter)); }
512 };
513 scsi_error_counter_t scsi_error_counters[3];
514
515 struct scsi_nonmedium_error_t {
516 struct scsiNonMediumError nme;
517 unsigned char found;
scsi_nonmedium_error_tpersistent_dev_state::scsi_nonmedium_error_t518 scsi_nonmedium_error_t() : found(0)
519 { memset(&nme, 0, sizeof(nme)); }
520 };
521 scsi_nonmedium_error_t scsi_nonmedium_error;
522
523 // NVMe only
524 uint64_t nvme_err_log_entries;
525
526 persistent_dev_state();
527 };
528
persistent_dev_state()529 persistent_dev_state::persistent_dev_state()
530 : tempmin(0), tempmax(0),
531 selflogcount(0),
532 selfloghour(0),
533 scheduled_test_next_check(0),
534 selective_test_last_start(0),
535 selective_test_last_end(0),
536 ataerrorcount(0),
537 nvme_err_log_entries(0)
538 {
539 }
540
541 /// Non-persistent state data for a device.
542 struct temp_dev_state
543 {
544 bool must_write; // true if persistent part should be written
545
546 bool not_cap_offline; // true == not capable of offline testing
547 bool not_cap_conveyance;
548 bool not_cap_short;
549 bool not_cap_long;
550 bool not_cap_selective;
551
552 unsigned char temperature; // last recorded Temperature (in Celsius)
553 time_t tempmin_delay; // time where Min Temperature tracking will start
554
555 bool removed; // true if open() failed for removable device
556
557 bool powermodefail; // true if power mode check failed
558 int powerskipcnt; // Number of checks skipped due to idle or standby mode
559 int lastpowermodeskipped; // the last power mode that was skipped
560
561 bool attrlog_dirty; // true if persistent part has new attr values that
562 // need to be written to attrlog
563
564 // SCSI ONLY
565 unsigned char SmartPageSupported; // has log sense IE page (0x2f)
566 unsigned char TempPageSupported; // has log sense temperature page (0xd)
567 unsigned char ReadECounterPageSupported;
568 unsigned char WriteECounterPageSupported;
569 unsigned char VerifyECounterPageSupported;
570 unsigned char NonMediumErrorPageSupported;
571 unsigned char SuppressReport; // minimize nuisance reports
572 unsigned char modese_len; // mode sense/select cmd len: 0 (don't
573 // know yet) 6 or 10
574 // ATA ONLY
575 uint64_t num_sectors; // Number of sectors
576 ata_smart_values smartval; // SMART data
577 ata_smart_thresholds_pvt smartthres; // SMART thresholds
578 bool offline_started; // true if offline data collection was started
579 bool selftest_started; // true if self-test was started
580
581 temp_dev_state();
582 };
583
temp_dev_state()584 temp_dev_state::temp_dev_state()
585 : must_write(false),
586 not_cap_offline(false),
587 not_cap_conveyance(false),
588 not_cap_short(false),
589 not_cap_long(false),
590 not_cap_selective(false),
591 temperature(0),
592 tempmin_delay(0),
593 removed(false),
594 powermodefail(false),
595 powerskipcnt(0),
596 lastpowermodeskipped(0),
597 attrlog_dirty(false),
598 SmartPageSupported(false),
599 TempPageSupported(false),
600 ReadECounterPageSupported(false),
601 WriteECounterPageSupported(false),
602 VerifyECounterPageSupported(false),
603 NonMediumErrorPageSupported(false),
604 SuppressReport(false),
605 modese_len(0),
606 num_sectors(0),
607 offline_started(false),
608 selftest_started(false)
609 {
610 memset(&smartval, 0, sizeof(smartval));
611 memset(&smartthres, 0, sizeof(smartthres));
612 }
613
614 /// Runtime state data for a device.
615 struct dev_state
616 : public persistent_dev_state,
617 public temp_dev_state
618 {
619 void update_persistent_state();
620 void update_temp_state();
621 };
622
623 /// Container for configuration info for each device.
624 typedef std::vector<dev_config> dev_config_vector;
625
626 /// Container for state info for each device.
627 typedef std::vector<dev_state> dev_state_vector;
628
629 // Copy ATA attributes to persistent state.
update_persistent_state()630 void dev_state::update_persistent_state()
631 {
632 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
633 const ata_smart_attribute & ta = smartval.vendor_attributes[i];
634 ata_attribute & pa = ata_attributes[i];
635 pa.id = ta.id;
636 if (ta.id == 0) {
637 pa.val = pa.worst = 0; pa.raw = 0;
638 continue;
639 }
640 pa.val = ta.current;
641 pa.worst = ta.worst;
642 pa.raw = ta.raw[0]
643 | ( ta.raw[1] << 8)
644 | ( ta.raw[2] << 16)
645 | ((uint64_t)ta.raw[3] << 24)
646 | ((uint64_t)ta.raw[4] << 32)
647 | ((uint64_t)ta.raw[5] << 40);
648 pa.resvd = ta.reserv;
649 }
650 }
651
652 // Copy ATA from persistent to temp state.
update_temp_state()653 void dev_state::update_temp_state()
654 {
655 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
656 const ata_attribute & pa = ata_attributes[i];
657 ata_smart_attribute & ta = smartval.vendor_attributes[i];
658 ta.id = pa.id;
659 if (pa.id == 0) {
660 ta.current = ta.worst = 0;
661 memset(ta.raw, 0, sizeof(ta.raw));
662 continue;
663 }
664 ta.current = pa.val;
665 ta.worst = pa.worst;
666 ta.raw[0] = (unsigned char) pa.raw;
667 ta.raw[1] = (unsigned char)(pa.raw >> 8);
668 ta.raw[2] = (unsigned char)(pa.raw >> 16);
669 ta.raw[3] = (unsigned char)(pa.raw >> 24);
670 ta.raw[4] = (unsigned char)(pa.raw >> 32);
671 ta.raw[5] = (unsigned char)(pa.raw >> 40);
672 ta.reserv = pa.resvd;
673 }
674 }
675
676 // Parse a line from a state file.
parse_dev_state_line(const char * line,persistent_dev_state & state)677 static bool parse_dev_state_line(const char * line, persistent_dev_state & state)
678 {
679 static const regular_expression regex(
680 "^ *"
681 "((temperature-min)" // (1 (2)
682 "|(temperature-max)" // (3)
683 "|(self-test-errors)" // (4)
684 "|(self-test-last-err-hour)" // (5)
685 "|(scheduled-test-next-check)" // (6)
686 "|(selective-test-last-start)" // (7)
687 "|(selective-test-last-end)" // (8)
688 "|(ata-error-count)" // (9)
689 "|(mail\\.([0-9]+)\\." // (10 (11)
690 "((count)" // (12 (13)
691 "|(first-sent-time)" // (14)
692 "|(last-sent-time)" // (15)
693 ")" // 12)
694 ")" // 10)
695 "|(ata-smart-attribute\\.([0-9]+)\\." // (16 (17)
696 "((id)" // (18 (19)
697 "|(val)" // (20)
698 "|(worst)" // (21)
699 "|(raw)" // (22)
700 "|(resvd)" // (23)
701 ")" // 18)
702 ")" // 16)
703 "|(nvme-err-log-entries)" // (24)
704 ")" // 1)
705 " *= *([0-9]+)[ \n]*$" // (25)
706 );
707
708 const int nmatch = 1+25;
709 regular_expression::match_range match[nmatch];
710 if (!regex.execute(line, nmatch, match))
711 return false;
712 if (match[nmatch-1].rm_so < 0)
713 return false;
714
715 uint64_t val = strtoull(line + match[nmatch-1].rm_so, (char **)0, 10);
716
717 int m = 1;
718 if (match[++m].rm_so >= 0)
719 state.tempmin = (unsigned char)val;
720 else if (match[++m].rm_so >= 0)
721 state.tempmax = (unsigned char)val;
722 else if (match[++m].rm_so >= 0)
723 state.selflogcount = (unsigned char)val;
724 else if (match[++m].rm_so >= 0)
725 state.selfloghour = (unsigned short)val;
726 else if (match[++m].rm_so >= 0)
727 state.scheduled_test_next_check = (time_t)val;
728 else if (match[++m].rm_so >= 0)
729 state.selective_test_last_start = val;
730 else if (match[++m].rm_so >= 0)
731 state.selective_test_last_end = val;
732 else if (match[++m].rm_so >= 0)
733 state.ataerrorcount = (int)val;
734 else if (match[m+=2].rm_so >= 0) {
735 int i = atoi(line+match[m].rm_so);
736 if (!(0 <= i && i < SMARTD_NMAIL))
737 return false;
738 if (i == MAILTYPE_TEST) // Don't suppress test mails
739 return true;
740 if (match[m+=2].rm_so >= 0)
741 state.maillog[i].logged = (int)val;
742 else if (match[++m].rm_so >= 0)
743 state.maillog[i].firstsent = (time_t)val;
744 else if (match[++m].rm_so >= 0)
745 state.maillog[i].lastsent = (time_t)val;
746 else
747 return false;
748 }
749 else if (match[m+=5+1].rm_so >= 0) {
750 int i = atoi(line+match[m].rm_so);
751 if (!(0 <= i && i < NUMBER_ATA_SMART_ATTRIBUTES))
752 return false;
753 if (match[m+=2].rm_so >= 0)
754 state.ata_attributes[i].id = (unsigned char)val;
755 else if (match[++m].rm_so >= 0)
756 state.ata_attributes[i].val = (unsigned char)val;
757 else if (match[++m].rm_so >= 0)
758 state.ata_attributes[i].worst = (unsigned char)val;
759 else if (match[++m].rm_so >= 0)
760 state.ata_attributes[i].raw = val;
761 else if (match[++m].rm_so >= 0)
762 state.ata_attributes[i].resvd = (unsigned char)val;
763 else
764 return false;
765 }
766 else if (match[m+7].rm_so >= 0)
767 state.nvme_err_log_entries = val;
768 else
769 return false;
770 return true;
771 }
772
773 // Read a state file.
read_dev_state(const char * path,persistent_dev_state & state)774 static bool read_dev_state(const char * path, persistent_dev_state & state)
775 {
776 stdio_file f(path, "r");
777 if (!f) {
778 if (errno != ENOENT)
779 pout("Cannot read state file \"%s\"\n", path);
780 return false;
781 }
782 #ifdef __CYGWIN__
783 setmode(fileno(f), O_TEXT); // Allow files with \r\n
784 #endif
785
786 persistent_dev_state new_state;
787 int good = 0, bad = 0;
788 char line[256];
789 while (fgets(line, sizeof(line), f)) {
790 const char * s = line + strspn(line, " \t");
791 if (!*s || *s == '#')
792 continue;
793 if (!parse_dev_state_line(line, new_state))
794 bad++;
795 else
796 good++;
797 }
798
799 if (bad) {
800 if (!good) {
801 pout("%s: format error\n", path);
802 return false;
803 }
804 pout("%s: %d invalid line(s) ignored\n", path, bad);
805 }
806
807 // This sets the values missing in the file to 0.
808 state = new_state;
809 return true;
810 }
811
write_dev_state_line(FILE * f,const char * name,uint64_t val)812 static void write_dev_state_line(FILE * f, const char * name, uint64_t val)
813 {
814 if (val)
815 fprintf(f, "%s = %" PRIu64 "\n", name, val);
816 }
817
write_dev_state_line(FILE * f,const char * name1,int id,const char * name2,uint64_t val)818 static void write_dev_state_line(FILE * f, const char * name1, int id, const char * name2, uint64_t val)
819 {
820 if (val)
821 fprintf(f, "%s.%d.%s = %" PRIu64 "\n", name1, id, name2, val);
822 }
823
824 // Write a state file
write_dev_state(const char * path,const persistent_dev_state & state)825 static bool write_dev_state(const char * path, const persistent_dev_state & state)
826 {
827 // Rename old "file" to "file~"
828 std::string pathbak = path; pathbak += '~';
829 unlink(pathbak.c_str());
830 rename(path, pathbak.c_str());
831
832 stdio_file f(path, "w");
833 if (!f) {
834 pout("Cannot create state file \"%s\"\n", path);
835 return false;
836 }
837
838 fprintf(f, "# smartd state file\n");
839 write_dev_state_line(f, "temperature-min", state.tempmin);
840 write_dev_state_line(f, "temperature-max", state.tempmax);
841 write_dev_state_line(f, "self-test-errors", state.selflogcount);
842 write_dev_state_line(f, "self-test-last-err-hour", state.selfloghour);
843 write_dev_state_line(f, "scheduled-test-next-check", state.scheduled_test_next_check);
844 write_dev_state_line(f, "selective-test-last-start", state.selective_test_last_start);
845 write_dev_state_line(f, "selective-test-last-end", state.selective_test_last_end);
846
847 int i;
848 for (i = 0; i < SMARTD_NMAIL; i++) {
849 if (i == MAILTYPE_TEST) // Don't suppress test mails
850 continue;
851 const mailinfo & mi = state.maillog[i];
852 if (!mi.logged)
853 continue;
854 write_dev_state_line(f, "mail", i, "count", mi.logged);
855 write_dev_state_line(f, "mail", i, "first-sent-time", mi.firstsent);
856 write_dev_state_line(f, "mail", i, "last-sent-time", mi.lastsent);
857 }
858
859 // ATA ONLY
860 write_dev_state_line(f, "ata-error-count", state.ataerrorcount);
861
862 for (i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
863 const persistent_dev_state::ata_attribute & pa = state.ata_attributes[i];
864 if (!pa.id)
865 continue;
866 write_dev_state_line(f, "ata-smart-attribute", i, "id", pa.id);
867 write_dev_state_line(f, "ata-smart-attribute", i, "val", pa.val);
868 write_dev_state_line(f, "ata-smart-attribute", i, "worst", pa.worst);
869 write_dev_state_line(f, "ata-smart-attribute", i, "raw", pa.raw);
870 write_dev_state_line(f, "ata-smart-attribute", i, "resvd", pa.resvd);
871 }
872
873 // NVMe only
874 write_dev_state_line(f, "nvme-err-log-entries", state.nvme_err_log_entries);
875
876 return true;
877 }
878
879 // Write to the attrlog file
write_dev_attrlog(const char * path,const dev_state & state)880 static bool write_dev_attrlog(const char * path, const dev_state & state)
881 {
882 stdio_file f(path, "a");
883 if (!f) {
884 pout("Cannot create attribute log file \"%s\"\n", path);
885 return false;
886 }
887
888
889 time_t now = time(0);
890 struct tm tmbuf, * tms = time_to_tm_local(&tmbuf, now);
891 fprintf(f, "%d-%02d-%02d %02d:%02d:%02d;",
892 1900+tms->tm_year, 1+tms->tm_mon, tms->tm_mday,
893 tms->tm_hour, tms->tm_min, tms->tm_sec);
894 // ATA ONLY
895 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
896 const persistent_dev_state::ata_attribute & pa = state.ata_attributes[i];
897 if (!pa.id)
898 continue;
899 fprintf(f, "\t%d;%d;%" PRIu64 ";", pa.id, pa.val, pa.raw);
900 }
901 // SCSI ONLY
902 const struct scsiErrorCounter * ecp;
903 const char * pageNames[3] = {"read", "write", "verify"};
904 for (int k = 0; k < 3; ++k) {
905 if ( !state.scsi_error_counters[k].found ) continue;
906 ecp = &state.scsi_error_counters[k].errCounter;
907 fprintf(f, "\t%s-corr-by-ecc-fast;%" PRIu64 ";"
908 "\t%s-corr-by-ecc-delayed;%" PRIu64 ";"
909 "\t%s-corr-by-retry;%" PRIu64 ";"
910 "\t%s-total-err-corrected;%" PRIu64 ";"
911 "\t%s-corr-algorithm-invocations;%" PRIu64 ";"
912 "\t%s-gb-processed;%.3f;"
913 "\t%s-total-unc-errors;%" PRIu64 ";",
914 pageNames[k], ecp->counter[0],
915 pageNames[k], ecp->counter[1],
916 pageNames[k], ecp->counter[2],
917 pageNames[k], ecp->counter[3],
918 pageNames[k], ecp->counter[4],
919 pageNames[k], (ecp->counter[5] / 1000000000.0),
920 pageNames[k], ecp->counter[6]);
921 }
922 if(state.scsi_nonmedium_error.found && state.scsi_nonmedium_error.nme.gotPC0) {
923 fprintf(f, "\tnon-medium-errors;%" PRIu64 ";", state.scsi_nonmedium_error.nme.counterPC0);
924 }
925 // write SCSI current temperature if it is monitored
926 if (state.temperature)
927 fprintf(f, "\ttemperature;%d;", state.temperature);
928 // end of line
929 fprintf(f, "\n");
930 return true;
931 }
932
933 // Write all state files. If write_always is false, don't write
934 // unless must_write is set.
write_all_dev_states(const dev_config_vector & configs,dev_state_vector & states,bool write_always=true)935 static void write_all_dev_states(const dev_config_vector & configs,
936 dev_state_vector & states,
937 bool write_always = true)
938 {
939 for (unsigned i = 0; i < states.size(); i++) {
940 const dev_config & cfg = configs.at(i);
941 if (cfg.state_file.empty())
942 continue;
943 dev_state & state = states[i];
944 if (!write_always && !state.must_write)
945 continue;
946 if (!write_dev_state(cfg.state_file.c_str(), state))
947 continue;
948 state.must_write = false;
949 if (write_always || debugmode)
950 PrintOut(LOG_INFO, "Device: %s, state written to %s\n",
951 cfg.name.c_str(), cfg.state_file.c_str());
952 }
953 }
954
955 // Write to all attrlog files
write_all_dev_attrlogs(const dev_config_vector & configs,dev_state_vector & states)956 static void write_all_dev_attrlogs(const dev_config_vector & configs,
957 dev_state_vector & states)
958 {
959 for (unsigned i = 0; i < states.size(); i++) {
960 const dev_config & cfg = configs.at(i);
961 if (cfg.attrlog_file.empty())
962 continue;
963 dev_state & state = states[i];
964 if (state.attrlog_dirty) {
965 write_dev_attrlog(cfg.attrlog_file.c_str(), state);
966 state.attrlog_dirty = false;
967 }
968 }
969 }
970
971 extern "C" { // signal handlers require C-linkage
972
973 // Note if we catch a SIGUSR1
USR1handler(int sig)974 static void USR1handler(int sig)
975 {
976 if (SIGUSR1==sig)
977 caughtsigUSR1=1;
978 return;
979 }
980
981 #ifdef _WIN32
982 // Note if we catch a SIGUSR2
USR2handler(int sig)983 static void USR2handler(int sig)
984 {
985 if (SIGUSR2==sig)
986 caughtsigUSR2=1;
987 return;
988 }
989 #endif
990
991 // Note if we catch a HUP (or INT in debug mode)
HUPhandler(int sig)992 static void HUPhandler(int sig)
993 {
994 if (sig==SIGHUP)
995 caughtsigHUP=1;
996 else
997 caughtsigHUP=2;
998 return;
999 }
1000
1001 // signal handler for TERM, QUIT, and INT (if not in debug mode)
sighandler(int sig)1002 static void sighandler(int sig)
1003 {
1004 if (!caughtsigEXIT)
1005 caughtsigEXIT=sig;
1006 return;
1007 }
1008
1009 } // extern "C"
1010
1011 #ifdef HAVE_LIBCAP_NG
1012 // capabilities(7) support
1013
1014 static bool capabilities_enabled = false;
1015
capabilities_drop_now()1016 static void capabilities_drop_now()
1017 {
1018 if (!capabilities_enabled)
1019 return;
1020 capng_clear(CAPNG_SELECT_BOTH);
1021 capng_updatev(CAPNG_ADD, (capng_type_t)(CAPNG_EFFECTIVE|CAPNG_PERMITTED),
1022 CAP_SYS_ADMIN, CAP_MKNOD, CAP_SYS_RAWIO, -1);
1023 capng_apply(CAPNG_SELECT_BOTH);
1024 }
1025
capabilities_check_config(dev_config_vector & configs)1026 static void capabilities_check_config(dev_config_vector & configs)
1027 {
1028 if (!capabilities_enabled)
1029 return;
1030 for (unsigned i = 0; i < configs.size(); i++) {
1031 dev_config & cfg = configs[i];
1032 if (!cfg.emailaddress.empty() || !cfg.emailcmdline.empty()) {
1033 PrintOut(LOG_INFO, "Device: %s, --capabilites is set, mail will be suppressed.\n",
1034 cfg.name.c_str());
1035 cfg.emailaddress.clear(); cfg.emailcmdline.clear();
1036 }
1037 }
1038 }
1039
1040 #else // HAVE_LIBCAP_NG
1041 // No capabilities(7) support
1042
capabilities_drop_now()1043 static inline void capabilities_drop_now() { }
capabilities_check_config(dev_config_vector &)1044 static inline void capabilities_check_config(dev_config_vector &) { }
1045
1046 #endif // HAVE_LIBCAP_NG
1047
1048 // a replacement for setenv() which is not available on all platforms.
1049 // Note that the string passed to putenv must not be freed or made
1050 // invalid, since a pointer to it is kept by putenv(). This means that
1051 // it must either be a static buffer or allocated off the heap. The
1052 // string can be freed if the environment variable is redefined via
1053 // another call to putenv(). There is no portable way to unset a variable
1054 // with putenv(). So we manage the buffer in a static object.
1055 // Using setenv() if available is not considered because some
1056 // implementations may produce memory leaks.
1057
1058 class env_buffer
1059 {
1060 public:
env_buffer()1061 env_buffer()
1062 : m_buf((char *)0) { }
1063
1064 void set(const char * name, const char * value);
1065
1066 private:
1067 char * m_buf;
1068
1069 env_buffer(const env_buffer &);
1070 void operator=(const env_buffer &);
1071 };
1072
set(const char * name,const char * value)1073 void env_buffer::set(const char * name, const char * value)
1074 {
1075 int size = strlen(name) + 1 + strlen(value) + 1;
1076 char * newbuf = new char[size];
1077 snprintf(newbuf, size, "%s=%s", name, value);
1078
1079 if (putenv(newbuf))
1080 throw std::runtime_error("putenv() failed");
1081
1082 // This assumes that the same NAME is passed on each call
1083 delete [] m_buf;
1084 m_buf = newbuf;
1085 }
1086
1087 #define EBUFLEN 1024
1088
1089 static void MailWarning(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
1090 __attribute_format_printf(4, 5);
1091
1092 // If either address or executable path is non-null then send and log
1093 // a warning email, or execute executable
MailWarning(const dev_config & cfg,dev_state & state,int which,const char * fmt,...)1094 static void MailWarning(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
1095 {
1096 static const char * const whichfail[] = {
1097 "EmailTest", // 0
1098 "Health", // 1
1099 "Usage", // 2
1100 "SelfTest", // 3
1101 "ErrorCount", // 4
1102 "FailedHealthCheck", // 5
1103 "FailedReadSmartData", // 6
1104 "FailedReadSmartErrorLog", // 7
1105 "FailedReadSmartSelfTestLog", // 8
1106 "FailedOpenDevice", // 9
1107 "CurrentPendingSector", // 10
1108 "OfflineUncorrectableSector", // 11
1109 "Temperature" // 12
1110 };
1111
1112 // See if user wants us to send mail
1113 if (cfg.emailaddress.empty() && cfg.emailcmdline.empty())
1114 return;
1115
1116 std::string address = cfg.emailaddress;
1117 const char * executable = cfg.emailcmdline.c_str();
1118
1119 // which type of mail are we sending?
1120 mailinfo * mail=(state.maillog)+which;
1121
1122 // checks for sanity
1123 if (cfg.emailfreq<1 || cfg.emailfreq>3) {
1124 PrintOut(LOG_CRIT,"internal error in MailWarning(): cfg.mailwarn->emailfreq=%d\n",cfg.emailfreq);
1125 return;
1126 }
1127 if (which<0 || which>=SMARTD_NMAIL || sizeof(whichfail)!=SMARTD_NMAIL*sizeof(char *)) {
1128 PrintOut(LOG_CRIT,"Contact " PACKAGE_BUGREPORT "; internal error in MailWarning(): which=%d, size=%d\n",
1129 which, (int)sizeof(whichfail));
1130 return;
1131 }
1132
1133 // Return if a single warning mail has been sent.
1134 if ((cfg.emailfreq==1) && mail->logged)
1135 return;
1136
1137 // Return if this is an email test and one has already been sent.
1138 if (which == 0 && mail->logged)
1139 return;
1140
1141 // To decide if to send mail, we need to know what time it is.
1142 time_t epoch = time(0);
1143
1144 // Return if less than one day has gone by
1145 const int day = 24*3600;
1146 if (cfg.emailfreq==2 && mail->logged && epoch<(mail->lastsent+day))
1147 return;
1148
1149 // Return if less than 2^(logged-1) days have gone by
1150 if (cfg.emailfreq==3 && mail->logged) {
1151 int days = 0x01 << (mail->logged - 1);
1152 days*=day;
1153 if (epoch<(mail->lastsent+days))
1154 return;
1155 }
1156
1157 // record the time of this mail message, and the first mail message
1158 if (!mail->logged)
1159 mail->firstsent=epoch;
1160 mail->lastsent=epoch;
1161
1162 // print warning string into message
1163 // Note: Message length may reach ~300 characters as device names may be
1164 // very long on certain platforms (macOS ~230 characters).
1165 // Message length must not exceed email line length limit, see RFC 5322:
1166 // "... MUST be no more than 998 characters, ... excluding the CRLF."
1167 char message[512];
1168 va_list ap;
1169 va_start(ap, fmt);
1170 vsnprintf(message, sizeof(message), fmt, ap);
1171 va_end(ap);
1172
1173 // replace commas by spaces to separate recipients
1174 std::replace(address.begin(), address.end(), ',', ' ');
1175
1176 // Export information in environment variables that will be useful
1177 // for user scripts
1178 static env_buffer env[12];
1179 env[0].set("SMARTD_MAILER", executable);
1180 env[1].set("SMARTD_MESSAGE", message);
1181 char dates[DATEANDEPOCHLEN];
1182 snprintf(dates, sizeof(dates), "%d", mail->logged);
1183 env[2].set("SMARTD_PREVCNT", dates);
1184 dateandtimezoneepoch(dates, mail->firstsent);
1185 env[3].set("SMARTD_TFIRST", dates);
1186 snprintf(dates, DATEANDEPOCHLEN,"%d", (int)mail->firstsent);
1187 env[4].set("SMARTD_TFIRSTEPOCH", dates);
1188 env[5].set("SMARTD_FAILTYPE", whichfail[which]);
1189 env[6].set("SMARTD_ADDRESS", address.c_str());
1190 env[7].set("SMARTD_DEVICESTRING", cfg.name.c_str());
1191
1192 // Allow 'smartctl ... -d $SMARTD_DEVICETYPE $SMARTD_DEVICE'
1193 env[8].set("SMARTD_DEVICETYPE",
1194 (!cfg.dev_type.empty() ? cfg.dev_type.c_str() : "auto"));
1195 env[9].set("SMARTD_DEVICE", cfg.dev_name.c_str());
1196
1197 env[10].set("SMARTD_DEVICEINFO", cfg.dev_idinfo.c_str());
1198 dates[0] = 0;
1199 if (which) switch (cfg.emailfreq) {
1200 case 2: dates[0] = '1'; dates[1] = 0; break;
1201 case 3: snprintf(dates, sizeof(dates), "%d", (0x01)<<mail->logged);
1202 }
1203 env[11].set("SMARTD_NEXTDAYS", dates);
1204
1205 // now construct a command to send this as EMAIL
1206 if (!*executable)
1207 executable = "<mail>";
1208 const char * newadd = (!address.empty()? address.c_str() : "<nomailer>");
1209 const char * newwarn = (which? "Warning via" : "Test of");
1210
1211 char command[256];
1212 #ifdef _WIN32
1213 // Path may contain spaces
1214 snprintf(command, sizeof(command), "\"%s\" 2>&1", warning_script.c_str());
1215 #else
1216 snprintf(command, sizeof(command), "%s 2>&1", warning_script.c_str());
1217 #endif
1218
1219 // tell SYSLOG what we are about to do...
1220 PrintOut(LOG_INFO,"%s %s to %s ...\n",
1221 which?"Sending warning via":"Executing test of", executable, newadd);
1222
1223 // issue the command to send mail or to run the user's executable
1224 errno=0;
1225 FILE * pfp;
1226 if (!(pfp=popen(command, "r")))
1227 // failed to popen() mail process
1228 PrintOut(LOG_CRIT,"%s %s to %s: failed (fork or pipe failed, or no memory) %s\n",
1229 newwarn, executable, newadd, errno?strerror(errno):"");
1230 else {
1231 // pipe succeeded!
1232 int len, status;
1233 char buffer[EBUFLEN];
1234
1235 // if unexpected output on stdout/stderr, null terminate, print, and flush
1236 if ((len=fread(buffer, 1, EBUFLEN, pfp))) {
1237 int count=0;
1238 int newlen = len<EBUFLEN ? len : EBUFLEN-1;
1239 buffer[newlen]='\0';
1240 PrintOut(LOG_CRIT,"%s %s to %s produced unexpected output (%s%d bytes) to STDOUT/STDERR: \n%s\n",
1241 newwarn, executable, newadd, len!=newlen?"here truncated to ":"", newlen, buffer);
1242
1243 // flush pipe if needed
1244 while (fread(buffer, 1, EBUFLEN, pfp) && count<EBUFLEN)
1245 count++;
1246
1247 // tell user that pipe was flushed, or that something is really wrong
1248 if (count && count<EBUFLEN)
1249 PrintOut(LOG_CRIT,"%s %s to %s: flushed remaining STDOUT/STDERR\n",
1250 newwarn, executable, newadd);
1251 else if (count)
1252 PrintOut(LOG_CRIT,"%s %s to %s: more than 1 MB STDOUT/STDERR flushed, breaking pipe\n",
1253 newwarn, executable, newadd);
1254 }
1255
1256 // if something went wrong with mail process, print warning
1257 errno=0;
1258 if (-1==(status=pclose(pfp)))
1259 PrintOut(LOG_CRIT,"%s %s to %s: pclose(3) failed %s\n", newwarn, executable, newadd,
1260 errno?strerror(errno):"");
1261 else {
1262 // mail process apparently succeeded. Check and report exit status
1263 if (WIFEXITED(status)) {
1264 // exited 'normally' (but perhaps with nonzero status)
1265 int status8 = WEXITSTATUS(status);
1266 if (status8>128)
1267 PrintOut(LOG_CRIT,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d) perhaps caught signal %d [%s]\n",
1268 newwarn, executable, newadd, status, status8, status8-128, strsignal(status8-128));
1269 else if (status8)
1270 PrintOut(LOG_CRIT,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d)\n",
1271 newwarn, executable, newadd, status, status8);
1272 else
1273 PrintOut(LOG_INFO,"%s %s to %s: successful\n", newwarn, executable, newadd);
1274 }
1275
1276 if (WIFSIGNALED(status))
1277 PrintOut(LOG_INFO,"%s %s to %s: exited because of uncaught signal %d [%s]\n",
1278 newwarn, executable, newadd, WTERMSIG(status), strsignal(WTERMSIG(status)));
1279
1280 // this branch is probably not possible. If subprocess is
1281 // stopped then pclose() should not return.
1282 if (WIFSTOPPED(status))
1283 PrintOut(LOG_CRIT,"%s %s to %s: process STOPPED because it caught signal %d [%s]\n",
1284 newwarn, executable, newadd, WSTOPSIG(status), strsignal(WSTOPSIG(status)));
1285
1286 }
1287 }
1288
1289 // increment mail sent counter
1290 mail->logged++;
1291 }
1292
1293 static void reset_warning_mail(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
1294 __attribute_format_printf(4, 5);
1295
reset_warning_mail(const dev_config & cfg,dev_state & state,int which,const char * fmt,...)1296 static void reset_warning_mail(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
1297 {
1298 if (!(0 <= which && which < SMARTD_NMAIL))
1299 return;
1300
1301 // Return if no mail sent yet
1302 mailinfo & mi = state.maillog[which];
1303 if (!mi.logged)
1304 return;
1305
1306 // Format & print message
1307 char msg[256];
1308 va_list ap;
1309 va_start(ap, fmt);
1310 vsnprintf(msg, sizeof(msg), fmt, ap);
1311 va_end(ap);
1312
1313 PrintOut(LOG_INFO, "Device: %s, %s, warning condition reset after %d email%s\n", cfg.name.c_str(),
1314 msg, mi.logged, (mi.logged==1 ? "" : "s"));
1315
1316 // Clear mail counter and timestamps
1317 mi = mailinfo();
1318 state.must_write = true;
1319 }
1320
1321 #ifndef _WIN32
1322
1323 // Output multiple lines via separate syslog(3) calls.
1324 __attribute_format_printf(2, 0)
vsyslog_lines(int priority,const char * fmt,va_list ap)1325 static void vsyslog_lines(int priority, const char * fmt, va_list ap)
1326 {
1327 char buf[512+EBUFLEN]; // enough space for exec cmd output in MailWarning()
1328 vsnprintf(buf, sizeof(buf), fmt, ap);
1329
1330 for (char * p = buf, * q; p && *p; p = q) {
1331 if ((q = strchr(p, '\n')))
1332 *q++ = 0;
1333 if (*p)
1334 syslog(priority, "%s\n", p);
1335 }
1336 }
1337
1338 #else // _WIN32
1339 // os_win32/syslog_win32.cpp supports multiple lines.
1340 #define vsyslog_lines vsyslog
1341 #endif // _WIN32
1342
1343 // Printing function for watching ataprint commands, or losing them
1344 // [From GLIBC Manual: Since the prototype doesn't specify types for
1345 // optional arguments, in a call to a variadic function the default
1346 // argument promotions are performed on the optional argument
1347 // values. This means the objects of type char or short int (whether
1348 // signed or not) are promoted to either int or unsigned int, as
1349 // appropriate.]
pout(const char * fmt,...)1350 void pout(const char *fmt, ...){
1351 va_list ap;
1352
1353 // get the correct time in syslog()
1354 FixGlibcTimeZoneBug();
1355 // initialize variable argument list
1356 va_start(ap,fmt);
1357 // in debugmode==1 mode we will print the output from the ataprint.o functions!
1358 if (debugmode && debugmode != 2) {
1359 FILE * f = stdout;
1360 #ifdef _WIN32
1361 if (facility == LOG_LOCAL1) // logging to stdout
1362 f = stderr;
1363 #endif
1364 vfprintf(f, fmt, ap);
1365 fflush(f);
1366 }
1367 // in debugmode==2 mode we print output from knowndrives.o functions
1368 else if (debugmode==2 || ata_debugmode || scsi_debugmode) {
1369 openlog("smartd", LOG_PID, facility);
1370 vsyslog_lines(LOG_INFO, fmt, ap);
1371 closelog();
1372 }
1373 va_end(ap);
1374 return;
1375 }
1376
1377 // This function prints either to stdout or to the syslog as needed.
PrintOut(int priority,const char * fmt,...)1378 static void PrintOut(int priority, const char *fmt, ...){
1379 va_list ap;
1380
1381 // get the correct time in syslog()
1382 FixGlibcTimeZoneBug();
1383 // initialize variable argument list
1384 va_start(ap,fmt);
1385 if (debugmode) {
1386 FILE * f = stdout;
1387 #ifdef _WIN32
1388 if (facility == LOG_LOCAL1) // logging to stdout
1389 f = stderr;
1390 #endif
1391 vfprintf(f, fmt, ap);
1392 fflush(f);
1393 }
1394 else {
1395 openlog("smartd", LOG_PID, facility);
1396 vsyslog_lines(priority, fmt, ap);
1397 closelog();
1398 }
1399 va_end(ap);
1400 return;
1401 }
1402
1403 // Used to warn users about invalid checksums. Called from atacmds.cpp.
checksumwarning(const char * string)1404 void checksumwarning(const char * string)
1405 {
1406 pout("Warning! %s error: invalid SMART checksum.\n", string);
1407 }
1408
1409 #ifndef _WIN32
1410
1411 // Wait for the pid file to show up, this makes sure a calling program knows
1412 // that the daemon is really up and running and has a pid to kill it
WaitForPidFile()1413 static bool WaitForPidFile()
1414 {
1415 int waited, max_wait = 10;
1416 struct stat stat_buf;
1417
1418 if (pid_file.empty() || debugmode)
1419 return true;
1420
1421 for(waited = 0; waited < max_wait; ++waited) {
1422 if (!stat(pid_file.c_str(), &stat_buf)) {
1423 return true;
1424 } else
1425 sleep(1);
1426 }
1427 return false;
1428 }
1429
1430 #endif // _WIN32
1431
1432 // Forks new process if needed, closes ALL file descriptors,
1433 // redirects stdin, stdout, and stderr. Not quite daemon().
1434 // See https://www.linuxjournal.com/article/2335
1435 // for a good description of why we do things this way.
daemon_init()1436 static int daemon_init()
1437 {
1438 #ifndef _WIN32
1439
1440 // flush all buffered streams. Else we might get two copies of open
1441 // streams since both parent and child get copies of the buffers.
1442 fflush(NULL);
1443
1444 if (do_fork) {
1445 pid_t pid;
1446 if ((pid=fork()) < 0) {
1447 // unable to fork!
1448 PrintOut(LOG_CRIT,"smartd unable to fork daemon process!\n");
1449 return EXIT_STARTUP;
1450 }
1451 if (pid) {
1452 // we are the parent process, wait for pid file, then exit cleanly
1453 if(!WaitForPidFile()) {
1454 PrintOut(LOG_CRIT,"PID file %s didn't show up!\n", pid_file.c_str());
1455 return EXIT_STARTUP;
1456 }
1457 return 0;
1458 }
1459
1460 // from here on, we are the child process.
1461 setsid();
1462
1463 // Fork one more time to avoid any possibility of having terminals
1464 if ((pid=fork()) < 0) {
1465 // unable to fork!
1466 PrintOut(LOG_CRIT,"smartd unable to fork daemon process!\n");
1467 return EXIT_STARTUP;
1468 }
1469 if (pid)
1470 // we are the parent process -- exit cleanly
1471 return 0;
1472
1473 // Now we are the child's child...
1474 }
1475
1476 // close any open file descriptors
1477 for (int i = getdtablesize(); --i >= 0; )
1478 close(i);
1479
1480 // redirect any IO attempts to /dev/null and change to root directory
1481 int fd = open("/dev/null", O_RDWR);
1482 if (!(fd == 0 && dup(fd) == 1 && dup(fd) == 2 && !chdir("/"))) {
1483 PrintOut(LOG_CRIT, "smartd unable to redirect to /dev/null or to chdir to root!\n");
1484 return EXIT_STARTUP;
1485 }
1486 umask(0022);
1487
1488 if (do_fork)
1489 PrintOut(LOG_INFO, "smartd has fork()ed into background mode. New PID=%d.\n", (int)getpid());
1490
1491 #else // _WIN32
1492
1493 // No fork() on native Win32
1494 // Detach this process from console
1495 fflush(NULL);
1496 if (daemon_detach("smartd")) {
1497 PrintOut(LOG_CRIT,"smartd unable to detach from console!\n");
1498 return EXIT_STARTUP;
1499 }
1500 // stdin/out/err now closed if not redirected
1501
1502 #endif // _WIN32
1503
1504 // No error, continue in main_worker()
1505 return -1;
1506 }
1507
1508 // create a PID file containing the current process id
write_pid_file()1509 static bool write_pid_file()
1510 {
1511 if (!pid_file.empty()) {
1512 pid_t pid = getpid();
1513 mode_t old_umask;
1514 #ifndef __CYGWIN__
1515 old_umask = umask(0077); // rwx------
1516 #else
1517 // Cygwin: smartd service runs on system account, ensure PID file can be read by admins
1518 old_umask = umask(0033); // rwxr--r--
1519 #endif
1520
1521 stdio_file f(pid_file.c_str(), "w");
1522 umask(old_umask);
1523 if (!(f && fprintf(f, "%d\n", (int)pid) > 0 && f.close())) {
1524 PrintOut(LOG_CRIT, "unable to write PID file %s - exiting.\n", pid_file.c_str());
1525 return false;
1526 }
1527 PrintOut(LOG_INFO, "file %s written containing PID %d\n", pid_file.c_str(), (int)pid);
1528 }
1529 return true;
1530 }
1531
1532 // Prints header identifying version of code and home
PrintHead()1533 static void PrintHead()
1534 {
1535 PrintOut(LOG_INFO, "%s\n", format_version_info("smartd").c_str());
1536 }
1537
1538 // prints help info for configuration file Directives
Directives()1539 static void Directives()
1540 {
1541 PrintOut(LOG_INFO,
1542 "Configuration file (%s) Directives (after device name):\n"
1543 " -d TYPE Set the device type: auto, ignore, removable,\n"
1544 " %s\n"
1545 " -T TYPE Set the tolerance to one of: normal, permissive\n"
1546 " -o VAL Enable/disable automatic offline tests (on/off)\n"
1547 " -S VAL Enable/disable attribute autosave (on/off)\n"
1548 " -n MODE No check if: never, sleep[,N][,q], standby[,N][,q], idle[,N][,q]\n"
1549 " -H Monitor SMART Health Status, report if failed\n"
1550 " -s REG Do Self-Test at time(s) given by regular expression REG\n"
1551 " -l TYPE Monitor SMART log or self-test status:\n"
1552 " error, selftest, xerror, offlinests[,ns], selfteststs[,ns]\n"
1553 " -l scterc,R,W Set SCT Error Recovery Control\n"
1554 " -e Change device setting: aam,[N|off], apm,[N|off], dsn,[on|off],\n"
1555 " lookahead,[on|off], security-freeze, standby,[N|off], wcache,[on|off]\n"
1556 " -f Monitor 'Usage' Attributes, report failures\n"
1557 " -m ADD Send email warning to address ADD\n"
1558 " -M TYPE Modify email warning behavior (see man page)\n"
1559 " -p Report changes in 'Prefailure' Attributes\n"
1560 " -u Report changes in 'Usage' Attributes\n"
1561 " -t Equivalent to -p and -u Directives\n"
1562 " -r ID Also report Raw values of Attribute ID with -p, -u or -t\n"
1563 " -R ID Track changes in Attribute ID Raw value with -p, -u or -t\n"
1564 " -i ID Ignore Attribute ID for -f Directive\n"
1565 " -I ID Ignore Attribute ID for -p, -u or -t Directive\n"
1566 " -C ID[+] Monitor [increases of] Current Pending Sectors in Attribute ID\n"
1567 " -U ID[+] Monitor [increases of] Offline Uncorrectable Sectors in Attribute ID\n"
1568 " -W D,I,C Monitor Temperature D)ifference, I)nformal limit, C)ritical limit\n"
1569 " -v N,ST Modifies labeling of Attribute N (see man page) \n"
1570 " -P TYPE Drive-specific presets: use, ignore, show, showall\n"
1571 " -a Default: -H -f -t -l error -l selftest -l selfteststs -C 197 -U 198\n"
1572 " -F TYPE Use firmware bug workaround:\n"
1573 " %s\n"
1574 " # Comment: text after a hash sign is ignored\n"
1575 " \\ Line continuation character\n"
1576 "Attribute ID is a decimal integer 1 <= ID <= 255\n"
1577 "Use ID = 0 to turn off -C and/or -U Directives\n"
1578 "Example: /dev/sda -a\n",
1579 configfile,
1580 smi()->get_valid_dev_types_str().c_str(),
1581 get_valid_firmwarebug_args());
1582 }
1583
1584 /* Returns a pointer to a static string containing a formatted list of the valid
1585 arguments to the option opt or NULL on failure. */
GetValidArgList(char opt)1586 static const char *GetValidArgList(char opt)
1587 {
1588 switch (opt) {
1589 case 'A':
1590 case 's':
1591 return "<PATH_PREFIX>";
1592 case 'B':
1593 return "[+]<FILE_NAME>";
1594 case 'c':
1595 return "<FILE_NAME>, -";
1596 case 'l':
1597 return "daemon, local0, local1, local2, local3, local4, local5, local6, local7";
1598 case 'q':
1599 return "nodev, errors, nodevstartup, never, onecheck, showtests";
1600 case 'r':
1601 return "ioctl[,N], ataioctl[,N], scsiioctl[,N], nvmeioctl[,N]";
1602 case 'p':
1603 case 'w':
1604 return "<FILE_NAME>";
1605 case 'i':
1606 return "<INTEGER_SECONDS>";
1607 default:
1608 return NULL;
1609 }
1610 }
1611
1612 /* prints help information for command syntax */
Usage()1613 static void Usage()
1614 {
1615 PrintOut(LOG_INFO,"Usage: smartd [options]\n\n");
1616 PrintOut(LOG_INFO," -A PREFIX, --attributelog=PREFIX\n");
1617 PrintOut(LOG_INFO," Log ATA attribute information to {PREFIX}MODEL-SERIAL.ata.csv\n");
1618 #ifdef SMARTMONTOOLS_ATTRIBUTELOG
1619 PrintOut(LOG_INFO," [default is " SMARTMONTOOLS_ATTRIBUTELOG "MODEL-SERIAL.ata.csv]\n");
1620 #endif
1621 PrintOut(LOG_INFO,"\n");
1622 PrintOut(LOG_INFO," -B [+]FILE, --drivedb=[+]FILE\n");
1623 PrintOut(LOG_INFO," Read and replace [add] drive database from FILE\n");
1624 PrintOut(LOG_INFO," [default is +%s", get_drivedb_path_add());
1625 #ifdef SMARTMONTOOLS_DRIVEDBDIR
1626 PrintOut(LOG_INFO,"\n");
1627 PrintOut(LOG_INFO," and then %s", get_drivedb_path_default());
1628 #endif
1629 PrintOut(LOG_INFO,"]\n\n");
1630 PrintOut(LOG_INFO," -c NAME|-, --configfile=NAME|-\n");
1631 PrintOut(LOG_INFO," Read configuration file NAME or stdin\n");
1632 PrintOut(LOG_INFO," [default is %s]\n\n", configfile);
1633 #ifdef HAVE_LIBCAP_NG
1634 PrintOut(LOG_INFO," -C, --capabilities\n");
1635 PrintOut(LOG_INFO," Drop unneeded Linux process capabilities.\n"
1636 " Warning: Mail notification does not work when used.\n\n");
1637 #endif
1638 PrintOut(LOG_INFO," -d, --debug\n");
1639 PrintOut(LOG_INFO," Start smartd in debug mode\n\n");
1640 PrintOut(LOG_INFO," -D, --showdirectives\n");
1641 PrintOut(LOG_INFO," Print the configuration file Directives and exit\n\n");
1642 PrintOut(LOG_INFO," -h, --help, --usage\n");
1643 PrintOut(LOG_INFO," Display this help and exit\n\n");
1644 PrintOut(LOG_INFO," -i N, --interval=N\n");
1645 PrintOut(LOG_INFO," Set interval between disk checks to N seconds, where N >= 10\n\n");
1646 PrintOut(LOG_INFO," -l local[0-7], --logfacility=local[0-7]\n");
1647 #ifndef _WIN32
1648 PrintOut(LOG_INFO," Use syslog facility local0 - local7 or daemon [default]\n\n");
1649 #else
1650 PrintOut(LOG_INFO," Log to \"./smartd.log\", stdout, stderr [default is event log]\n\n");
1651 #endif
1652 #ifndef _WIN32
1653 PrintOut(LOG_INFO," -n, --no-fork\n");
1654 PrintOut(LOG_INFO," Do not fork into background\n");
1655 #ifdef HAVE_LIBSYSTEMD
1656 PrintOut(LOG_INFO," (systemd 'Type=notify' is assumed if $NOTIFY_SOCKET is set)\n");
1657 #endif // HAVE_LIBSYSTEMD
1658 PrintOut(LOG_INFO,"\n");
1659 #endif // WIN32
1660 PrintOut(LOG_INFO," -p NAME, --pidfile=NAME\n");
1661 PrintOut(LOG_INFO," Write PID file NAME\n\n");
1662 PrintOut(LOG_INFO," -q WHEN, --quit=WHEN\n");
1663 PrintOut(LOG_INFO," Quit on one of: %s\n\n", GetValidArgList('q'));
1664 PrintOut(LOG_INFO," -r, --report=TYPE\n");
1665 PrintOut(LOG_INFO," Report transactions for one of: %s\n\n", GetValidArgList('r'));
1666 PrintOut(LOG_INFO," -s PREFIX, --savestates=PREFIX\n");
1667 PrintOut(LOG_INFO," Save disk states to {PREFIX}MODEL-SERIAL.TYPE.state\n");
1668 #ifdef SMARTMONTOOLS_SAVESTATES
1669 PrintOut(LOG_INFO," [default is " SMARTMONTOOLS_SAVESTATES "MODEL-SERIAL.TYPE.state]\n");
1670 #endif
1671 PrintOut(LOG_INFO,"\n");
1672 PrintOut(LOG_INFO," -w NAME, --warnexec=NAME\n");
1673 PrintOut(LOG_INFO," Run executable NAME on warnings\n");
1674 #ifndef _WIN32
1675 PrintOut(LOG_INFO," [default is " SMARTMONTOOLS_SMARTDSCRIPTDIR "/smartd_warning.sh]\n\n");
1676 #else
1677 PrintOut(LOG_INFO," [default is %s/smartd_warning.cmd]\n\n", get_exe_dir().c_str());
1678 #endif
1679 #ifdef _WIN32
1680 PrintOut(LOG_INFO," --service\n");
1681 PrintOut(LOG_INFO," Running as windows service (see man page), install with:\n");
1682 PrintOut(LOG_INFO," smartd install [options]\n");
1683 PrintOut(LOG_INFO," Remove service with:\n");
1684 PrintOut(LOG_INFO," smartd remove\n\n");
1685 #endif // _WIN32
1686 PrintOut(LOG_INFO," -V, --version, --license, --copyright\n");
1687 PrintOut(LOG_INFO," Print License, Copyright, and version information\n");
1688 }
1689
CloseDevice(smart_device * device,const char * name)1690 static int CloseDevice(smart_device * device, const char * name)
1691 {
1692 if (!device->close()){
1693 PrintOut(LOG_INFO,"Device: %s, %s, close() failed\n", name, device->get_errmsg());
1694 return 1;
1695 }
1696 // device successfully closed
1697 return 0;
1698 }
1699
1700 // return true if a char is not allowed in a state file name
not_allowed_in_filename(char c)1701 static bool not_allowed_in_filename(char c)
1702 {
1703 return !( ('0' <= c && c <= '9')
1704 || ('A' <= c && c <= 'Z')
1705 || ('a' <= c && c <= 'z'));
1706 }
1707
1708 // Read error count from Summary or Extended Comprehensive SMART error log
1709 // Return -1 on error
read_ata_error_count(ata_device * device,const char * name,firmwarebug_defs firmwarebugs,bool extended)1710 static int read_ata_error_count(ata_device * device, const char * name,
1711 firmwarebug_defs firmwarebugs, bool extended)
1712 {
1713 if (!extended) {
1714 ata_smart_errorlog log;
1715 if (ataReadErrorLog(device, &log, firmwarebugs)){
1716 PrintOut(LOG_INFO,"Device: %s, Read Summary SMART Error Log failed\n",name);
1717 return -1;
1718 }
1719 return (log.error_log_pointer ? log.ata_error_count : 0);
1720 }
1721 else {
1722 ata_smart_exterrlog logx;
1723 if (!ataReadExtErrorLog(device, &logx, 0, 1 /*first sector only*/, firmwarebugs)) {
1724 PrintOut(LOG_INFO,"Device: %s, Read Extended Comprehensive SMART Error Log failed\n",name);
1725 return -1;
1726 }
1727 // Some disks use the reserved byte as index, see ataprint.cpp.
1728 return (logx.error_log_index || logx.reserved1 ? logx.device_error_count : 0);
1729 }
1730 }
1731
1732 // returns <0 if problem. Otherwise, bottom 8 bits are the self test
1733 // error count, and top bits are the power-on hours of the last error.
SelfTestErrorCount(ata_device * device,const char * name,firmwarebug_defs firmwarebugs)1734 static int SelfTestErrorCount(ata_device * device, const char * name,
1735 firmwarebug_defs firmwarebugs)
1736 {
1737 struct ata_smart_selftestlog log;
1738
1739 if (ataReadSelfTestLog(device, &log, firmwarebugs)){
1740 PrintOut(LOG_INFO,"Device: %s, Read SMART Self Test Log Failed\n",name);
1741 return -1;
1742 }
1743
1744 if (!log.mostrecenttest)
1745 // No tests logged
1746 return 0;
1747
1748 // Count failed self-tests
1749 int errcnt = 0, hours = 0;
1750 for (int i = 20; i >= 0; i--) {
1751 int j = (i + log.mostrecenttest) % 21;
1752 const ata_smart_selftestlog_struct & entry = log.selftest_struct[j];
1753 if (!nonempty(&entry, sizeof(entry)))
1754 continue;
1755
1756 int status = entry.selfteststatus >> 4;
1757 if (status == 0x0 && (entry.selftestnumber & 0x7f) == 0x02)
1758 // First successful extended self-test, stop count
1759 break;
1760
1761 if (0x3 <= status && status <= 0x8) {
1762 // Self-test showed an error
1763 errcnt++;
1764 // Keep track of time of most recent error
1765 if (!hours)
1766 hours = entry.timestamp;
1767 }
1768 }
1769
1770 return ((hours << 8) | errcnt);
1771 }
1772
1773 #define SELFTEST_ERRORCOUNT(x) (x & 0xff)
1774 #define SELFTEST_ERRORHOURS(x) ((x >> 8) & 0xffff)
1775
1776 // Check offline data collection status
is_offl_coll_in_progress(unsigned char status)1777 static inline bool is_offl_coll_in_progress(unsigned char status)
1778 {
1779 return ((status & 0x7f) == 0x03);
1780 }
1781
1782 // Check self-test execution status
is_self_test_in_progress(unsigned char status)1783 static inline bool is_self_test_in_progress(unsigned char status)
1784 {
1785 return ((status >> 4) == 0xf);
1786 }
1787
1788 // Log offline data collection status
log_offline_data_coll_status(const char * name,unsigned char status)1789 static void log_offline_data_coll_status(const char * name, unsigned char status)
1790 {
1791 const char * msg;
1792 switch (status & 0x7f) {
1793 case 0x00: msg = "was never started"; break;
1794 case 0x02: msg = "was completed without error"; break;
1795 case 0x03: msg = "is in progress"; break;
1796 case 0x04: msg = "was suspended by an interrupting command from host"; break;
1797 case 0x05: msg = "was aborted by an interrupting command from host"; break;
1798 case 0x06: msg = "was aborted by the device with a fatal error"; break;
1799 default: msg = 0;
1800 }
1801
1802 if (msg)
1803 PrintOut(((status & 0x7f) == 0x06 ? LOG_CRIT : LOG_INFO),
1804 "Device: %s, offline data collection %s%s\n", name, msg,
1805 ((status & 0x80) ? " (auto:on)" : ""));
1806 else
1807 PrintOut(LOG_INFO, "Device: %s, unknown offline data collection status 0x%02x\n",
1808 name, status);
1809 }
1810
1811 // Log self-test execution status
log_self_test_exec_status(const char * name,unsigned char status)1812 static void log_self_test_exec_status(const char * name, unsigned char status)
1813 {
1814 const char * msg;
1815 switch (status >> 4) {
1816 case 0x0: msg = "completed without error"; break;
1817 case 0x1: msg = "was aborted by the host"; break;
1818 case 0x2: msg = "was interrupted by the host with a reset"; break;
1819 case 0x3: msg = "could not complete due to a fatal or unknown error"; break;
1820 case 0x4: msg = "completed with error (unknown test element)"; break;
1821 case 0x5: msg = "completed with error (electrical test element)"; break;
1822 case 0x6: msg = "completed with error (servo/seek test element)"; break;
1823 case 0x7: msg = "completed with error (read test element)"; break;
1824 case 0x8: msg = "completed with error (handling damage?)"; break;
1825 default: msg = 0;
1826 }
1827
1828 if (msg)
1829 PrintOut(((status >> 4) >= 0x4 ? LOG_CRIT : LOG_INFO),
1830 "Device: %s, previous self-test %s\n", name, msg);
1831 else if ((status >> 4) == 0xf)
1832 PrintOut(LOG_INFO, "Device: %s, self-test in progress, %u0%% remaining\n",
1833 name, status & 0x0f);
1834 else
1835 PrintOut(LOG_INFO, "Device: %s, unknown self-test status 0x%02x\n",
1836 name, status);
1837 }
1838
1839 // Check pending sector count id (-C, -U directives).
check_pending_id(const dev_config & cfg,const dev_state & state,unsigned char id,const char * msg)1840 static bool check_pending_id(const dev_config & cfg, const dev_state & state,
1841 unsigned char id, const char * msg)
1842 {
1843 // Check attribute index
1844 int i = ata_find_attr_index(id, state.smartval);
1845 if (i < 0) {
1846 PrintOut(LOG_INFO, "Device: %s, can't monitor %s count - no Attribute %d\n",
1847 cfg.name.c_str(), msg, id);
1848 return false;
1849 }
1850
1851 // Check value
1852 uint64_t rawval = ata_get_attr_raw_value(state.smartval.vendor_attributes[i],
1853 cfg.attribute_defs);
1854 if (rawval >= (state.num_sectors ? state.num_sectors : 0xffffffffULL)) {
1855 PrintOut(LOG_INFO, "Device: %s, ignoring %s count - bogus Attribute %d value %" PRIu64 " (0x%" PRIx64 ")\n",
1856 cfg.name.c_str(), msg, id, rawval, rawval);
1857 return false;
1858 }
1859
1860 return true;
1861 }
1862
1863 // Called by ATA/SCSI/NVMeDeviceScan() after successful device check
finish_device_scan(dev_config & cfg,dev_state & state)1864 static void finish_device_scan(dev_config & cfg, dev_state & state)
1865 {
1866 // Set cfg.emailfreq if user hasn't set it
1867 if ((!cfg.emailaddress.empty() || !cfg.emailcmdline.empty()) && !cfg.emailfreq) {
1868 // Avoid that emails are suppressed forever due to state persistence
1869 if (cfg.state_file.empty())
1870 cfg.emailfreq = 1; // '-M once'
1871 else
1872 cfg.emailfreq = 2; // '-M daily'
1873 }
1874
1875 // Start self-test regex check now if time was not read from state file
1876 if (!cfg.test_regex.empty() && !state.scheduled_test_next_check)
1877 state.scheduled_test_next_check = time(0);
1878 }
1879
1880 // Common function to format result message for ATA setting
format_set_result_msg(std::string & msg,const char * name,bool ok,int set_option=0,bool has_value=false)1881 static void format_set_result_msg(std::string & msg, const char * name, bool ok,
1882 int set_option = 0, bool has_value = false)
1883 {
1884 if (!msg.empty())
1885 msg += ", ";
1886 msg += name;
1887 if (!ok)
1888 msg += ":--";
1889 else if (set_option < 0)
1890 msg += ":off";
1891 else if (has_value)
1892 msg += strprintf(":%d", set_option-1);
1893 else if (set_option > 0)
1894 msg += ":on";
1895 }
1896
1897 // Return true and print message if CFG.dev_idinfo is already in PREV_CFGS
is_duplicate_dev_idinfo(const dev_config & cfg,const dev_config_vector & prev_cfgs)1898 static bool is_duplicate_dev_idinfo(const dev_config & cfg, const dev_config_vector & prev_cfgs)
1899 {
1900 if (!cfg.id_is_unique)
1901 return false;
1902
1903 for (unsigned i = 0; i < prev_cfgs.size(); i++) {
1904 if (!prev_cfgs[i].id_is_unique)
1905 continue;
1906 if (cfg.dev_idinfo != prev_cfgs[i].dev_idinfo)
1907 continue;
1908
1909 PrintOut(LOG_INFO, "Device: %s, same identity as %s, ignored\n",
1910 cfg.dev_name.c_str(), prev_cfgs[i].dev_name.c_str());
1911 return true;
1912 }
1913
1914 return false;
1915 }
1916
1917 // TODO: Add '-F swapid' directive
1918 const bool fix_swapped_id = false;
1919
1920 // scan to see what ata devices there are, and if they support SMART
ATADeviceScan(dev_config & cfg,dev_state & state,ata_device * atadev,const dev_config_vector * prev_cfgs)1921 static int ATADeviceScan(dev_config & cfg, dev_state & state, ata_device * atadev,
1922 const dev_config_vector * prev_cfgs)
1923 {
1924 int supported=0;
1925 struct ata_identify_device drive;
1926 const char *name = cfg.name.c_str();
1927 int retid;
1928
1929 // Device must be open
1930
1931 // Get drive identity structure
1932 if ((retid = ata_read_identity(atadev, &drive, fix_swapped_id))) {
1933 if (retid<0)
1934 // Unable to read Identity structure
1935 PrintOut(LOG_INFO,"Device: %s, not ATA, no IDENTIFY DEVICE Structure\n",name);
1936 else
1937 PrintOut(LOG_INFO,"Device: %s, packet devices [this device %s] not SMART capable\n",
1938 name, packetdevicetype(retid-1));
1939 CloseDevice(atadev, name);
1940 return 2;
1941 }
1942
1943 // Get drive identity, size and rotation rate (HDD/SSD)
1944 char model[40+1], serial[20+1], firmware[8+1];
1945 ata_format_id_string(model, drive.model, sizeof(model)-1);
1946 ata_format_id_string(serial, drive.serial_no, sizeof(serial)-1);
1947 ata_format_id_string(firmware, drive.fw_rev, sizeof(firmware)-1);
1948
1949 ata_size_info sizes;
1950 ata_get_size_info(&drive, sizes);
1951 state.num_sectors = sizes.sectors;
1952 cfg.dev_rpm = ata_get_rotation_rate(&drive);
1953
1954 char wwn[64]; wwn[0] = 0;
1955 unsigned oui = 0; uint64_t unique_id = 0;
1956 int naa = ata_get_wwn(&drive, oui, unique_id);
1957 if (naa >= 0)
1958 snprintf(wwn, sizeof(wwn), "WWN:%x-%06x-%09" PRIx64 ", ", naa, oui, unique_id);
1959
1960 // Format device id string for warning emails
1961 char cap[32];
1962 cfg.dev_idinfo = strprintf("%s, S/N:%s, %sFW:%s, %s", model, serial, wwn, firmware,
1963 format_capacity(cap, sizeof(cap), sizes.capacity, "."));
1964 cfg.id_is_unique = true; // TODO: Check serial?
1965
1966 PrintOut(LOG_INFO, "Device: %s, %s\n", name, cfg.dev_idinfo.c_str());
1967
1968 // Check for duplicates
1969 if (prev_cfgs && is_duplicate_dev_idinfo(cfg, *prev_cfgs)) {
1970 CloseDevice(atadev, name);
1971 return 1;
1972 }
1973
1974 // Show if device in database, and use preset vendor attribute
1975 // options unless user has requested otherwise.
1976 if (cfg.ignorepresets)
1977 PrintOut(LOG_INFO, "Device: %s, smartd database not searched (Directive: -P ignore).\n", name);
1978 else {
1979 // Apply vendor specific presets, print warning if present
1980 const drive_settings * dbentry = lookup_drive_apply_presets(
1981 &drive, cfg.attribute_defs, cfg.firmwarebugs);
1982 if (!dbentry)
1983 PrintOut(LOG_INFO, "Device: %s, not found in smartd database.\n", name);
1984 else {
1985 PrintOut(LOG_INFO, "Device: %s, found in smartd database%s%s\n",
1986 name, (*dbentry->modelfamily ? ": " : "."), (*dbentry->modelfamily ? dbentry->modelfamily : ""));
1987 if (*dbentry->warningmsg)
1988 PrintOut(LOG_CRIT, "Device: %s, WARNING: %s\n", name, dbentry->warningmsg);
1989 }
1990 }
1991
1992 // Check for ATA Security LOCK
1993 unsigned short word128 = drive.words088_255[128-88];
1994 bool locked = ((word128 & 0x0007) == 0x0007); // LOCKED|ENABLED|SUPPORTED
1995 if (locked)
1996 PrintOut(LOG_INFO, "Device: %s, ATA Security is **LOCKED**\n", name);
1997
1998 // Set default '-C 197[+]' if no '-C ID' is specified.
1999 if (!cfg.curr_pending_set)
2000 cfg.curr_pending_id = get_unc_attr_id(false, cfg.attribute_defs, cfg.curr_pending_incr);
2001 // Set default '-U 198[+]' if no '-U ID' is specified.
2002 if (!cfg.offl_pending_set)
2003 cfg.offl_pending_id = get_unc_attr_id(true, cfg.attribute_defs, cfg.offl_pending_incr);
2004
2005 // If requested, show which presets would be used for this drive
2006 if (cfg.showpresets) {
2007 int savedebugmode=debugmode;
2008 PrintOut(LOG_INFO, "Device %s: presets are:\n", name);
2009 if (!debugmode)
2010 debugmode=2;
2011 show_presets(&drive);
2012 debugmode=savedebugmode;
2013 }
2014
2015 // see if drive supports SMART
2016 supported=ataSmartSupport(&drive);
2017 if (supported!=1) {
2018 if (supported==0)
2019 // drive does NOT support SMART
2020 PrintOut(LOG_INFO,"Device: %s, lacks SMART capability\n",name);
2021 else
2022 // can't tell if drive supports SMART
2023 PrintOut(LOG_INFO,"Device: %s, ATA IDENTIFY DEVICE words 82-83 don't specify if SMART capable.\n",name);
2024
2025 // should we proceed anyway?
2026 if (cfg.permissive) {
2027 PrintOut(LOG_INFO,"Device: %s, proceeding since '-T permissive' Directive given.\n",name);
2028 }
2029 else {
2030 PrintOut(LOG_INFO,"Device: %s, to proceed anyway, use '-T permissive' Directive.\n",name);
2031 CloseDevice(atadev, name);
2032 return 2;
2033 }
2034 }
2035
2036 if (ataEnableSmart(atadev)) {
2037 // Enable SMART command has failed
2038 PrintOut(LOG_INFO,"Device: %s, could not enable SMART capability\n",name);
2039
2040 if (ataIsSmartEnabled(&drive) <= 0) {
2041 if (!cfg.permissive) {
2042 PrintOut(LOG_INFO, "Device: %s, to proceed anyway, use '-T permissive' Directive.\n", name);
2043 CloseDevice(atadev, name);
2044 return 2;
2045 }
2046 PrintOut(LOG_INFO, "Device: %s, proceeding since '-T permissive' Directive given.\n", name);
2047 }
2048 else {
2049 PrintOut(LOG_INFO, "Device: %s, proceeding since SMART is already enabled\n", name);
2050 }
2051 }
2052
2053 // disable device attribute autosave...
2054 if (cfg.autosave==1) {
2055 if (ataDisableAutoSave(atadev))
2056 PrintOut(LOG_INFO,"Device: %s, could not disable SMART Attribute Autosave.\n",name);
2057 else
2058 PrintOut(LOG_INFO,"Device: %s, disabled SMART Attribute Autosave.\n",name);
2059 }
2060
2061 // or enable device attribute autosave
2062 if (cfg.autosave==2) {
2063 if (ataEnableAutoSave(atadev))
2064 PrintOut(LOG_INFO,"Device: %s, could not enable SMART Attribute Autosave.\n",name);
2065 else
2066 PrintOut(LOG_INFO,"Device: %s, enabled SMART Attribute Autosave.\n",name);
2067 }
2068
2069 // capability check: SMART status
2070 if (cfg.smartcheck && ataSmartStatus2(atadev) == -1) {
2071 PrintOut(LOG_INFO,"Device: %s, not capable of SMART Health Status check\n",name);
2072 cfg.smartcheck = false;
2073 }
2074
2075 // capability check: Read smart values and thresholds. Note that
2076 // smart values are ALSO needed even if we ONLY want to know if the
2077 // device is self-test log or error-log capable! After ATA-5, this
2078 // information was ALSO reproduced in the IDENTIFY DEVICE response,
2079 // but sadly not for ATA-5. Sigh.
2080
2081 // do we need to get SMART data?
2082 bool smart_val_ok = false;
2083 if ( cfg.autoofflinetest || cfg.selftest
2084 || cfg.errorlog || cfg.xerrorlog
2085 || cfg.offlinests || cfg.selfteststs
2086 || cfg.usagefailed || cfg.prefail || cfg.usage
2087 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit
2088 || cfg.curr_pending_id || cfg.offl_pending_id ) {
2089
2090 if (ataReadSmartValues(atadev, &state.smartval)) {
2091 PrintOut(LOG_INFO, "Device: %s, Read SMART Values failed\n", name);
2092 cfg.usagefailed = cfg.prefail = cfg.usage = false;
2093 cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
2094 cfg.curr_pending_id = cfg.offl_pending_id = 0;
2095 }
2096 else {
2097 smart_val_ok = true;
2098 if (ataReadSmartThresholds(atadev, &state.smartthres)) {
2099 PrintOut(LOG_INFO, "Device: %s, Read SMART Thresholds failed%s\n",
2100 name, (cfg.usagefailed ? ", ignoring -f Directive" : ""));
2101 cfg.usagefailed = false;
2102 // Let ata_get_attr_state() return ATTRSTATE_NO_THRESHOLD:
2103 memset(&state.smartthres, 0, sizeof(state.smartthres));
2104 }
2105 }
2106
2107 // see if the necessary Attribute is there to monitor offline or
2108 // current pending sectors or temperature
2109 if ( cfg.curr_pending_id
2110 && !check_pending_id(cfg, state, cfg.curr_pending_id,
2111 "Current_Pending_Sector"))
2112 cfg.curr_pending_id = 0;
2113
2114 if ( cfg.offl_pending_id
2115 && !check_pending_id(cfg, state, cfg.offl_pending_id,
2116 "Offline_Uncorrectable"))
2117 cfg.offl_pending_id = 0;
2118
2119 if ( (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
2120 && !ata_return_temperature_value(&state.smartval, cfg.attribute_defs)) {
2121 PrintOut(LOG_INFO, "Device: %s, can't monitor Temperature, ignoring -W %d,%d,%d\n",
2122 name, cfg.tempdiff, cfg.tempinfo, cfg.tempcrit);
2123 cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
2124 }
2125
2126 // Report ignored '-r' or '-R' directives
2127 for (int id = 1; id <= 255; id++) {
2128 if (cfg.monitor_attr_flags.is_set(id, MONITOR_RAW_PRINT)) {
2129 char opt = (!cfg.monitor_attr_flags.is_set(id, MONITOR_RAW) ? 'r' : 'R');
2130 const char * excl = (cfg.monitor_attr_flags.is_set(id,
2131 (opt == 'r' ? MONITOR_AS_CRIT : MONITOR_RAW_AS_CRIT)) ? "!" : "");
2132
2133 int idx = ata_find_attr_index(id, state.smartval);
2134 if (idx < 0)
2135 PrintOut(LOG_INFO,"Device: %s, no Attribute %d, ignoring -%c %d%s\n", name, id, opt, id, excl);
2136 else {
2137 bool prefail = !!ATTRIBUTE_FLAGS_PREFAILURE(state.smartval.vendor_attributes[idx].flags);
2138 if (!((prefail && cfg.prefail) || (!prefail && cfg.usage)))
2139 PrintOut(LOG_INFO,"Device: %s, not monitoring %s Attributes, ignoring -%c %d%s\n", name,
2140 (prefail ? "Prefailure" : "Usage"), opt, id, excl);
2141 }
2142 }
2143 }
2144 }
2145
2146 // enable/disable automatic on-line testing
2147 if (cfg.autoofflinetest) {
2148 // is this an enable or disable request?
2149 const char *what=(cfg.autoofflinetest==1)?"disable":"enable";
2150 if (!smart_val_ok)
2151 PrintOut(LOG_INFO,"Device: %s, could not %s SMART Automatic Offline Testing.\n",name, what);
2152 else {
2153 // if command appears unsupported, issue a warning...
2154 if (!isSupportAutomaticTimer(&state.smartval))
2155 PrintOut(LOG_INFO,"Device: %s, SMART Automatic Offline Testing unsupported...\n",name);
2156 // ... but then try anyway
2157 if ((cfg.autoofflinetest==1)?ataDisableAutoOffline(atadev):ataEnableAutoOffline(atadev))
2158 PrintOut(LOG_INFO,"Device: %s, %s SMART Automatic Offline Testing failed.\n", name, what);
2159 else
2160 PrintOut(LOG_INFO,"Device: %s, %sd SMART Automatic Offline Testing.\n", name, what);
2161 }
2162 }
2163
2164 // Read log directories if required for capability check
2165 ata_smart_log_directory smart_logdir, gp_logdir;
2166 bool smart_logdir_ok = false, gp_logdir_ok = false;
2167
2168 if ( isGeneralPurposeLoggingCapable(&drive)
2169 && (cfg.errorlog || cfg.selftest)
2170 && !cfg.firmwarebugs.is_set(BUG_NOLOGDIR)) {
2171 if (!ataReadLogDirectory(atadev, &smart_logdir, false))
2172 smart_logdir_ok = true;
2173 }
2174
2175 if (cfg.xerrorlog && !cfg.firmwarebugs.is_set(BUG_NOLOGDIR)) {
2176 if (!ataReadLogDirectory(atadev, &gp_logdir, true))
2177 gp_logdir_ok = true;
2178 }
2179
2180 // capability check: self-test-log
2181 state.selflogcount = 0; state.selfloghour = 0;
2182 if (cfg.selftest) {
2183 int retval;
2184 if (!( cfg.permissive
2185 || ( smart_logdir_ok && smart_logdir.entry[0x06-1].numsectors)
2186 || (!smart_logdir_ok && smart_val_ok && isSmartTestLogCapable(&state.smartval, &drive)))) {
2187 PrintOut(LOG_INFO, "Device: %s, no SMART Self-test Log, ignoring -l selftest (override with -T permissive)\n", name);
2188 cfg.selftest = false;
2189 }
2190 else if ((retval = SelfTestErrorCount(atadev, name, cfg.firmwarebugs)) < 0) {
2191 PrintOut(LOG_INFO, "Device: %s, no SMART Self-test Log, ignoring -l selftest\n", name);
2192 cfg.selftest = false;
2193 }
2194 else {
2195 state.selflogcount=SELFTEST_ERRORCOUNT(retval);
2196 state.selfloghour =SELFTEST_ERRORHOURS(retval);
2197 }
2198 }
2199
2200 // capability check: ATA error log
2201 state.ataerrorcount = 0;
2202 if (cfg.errorlog) {
2203 int errcnt1;
2204 if (!( cfg.permissive
2205 || ( smart_logdir_ok && smart_logdir.entry[0x01-1].numsectors)
2206 || (!smart_logdir_ok && smart_val_ok && isSmartErrorLogCapable(&state.smartval, &drive)))) {
2207 PrintOut(LOG_INFO, "Device: %s, no SMART Error Log, ignoring -l error (override with -T permissive)\n", name);
2208 cfg.errorlog = false;
2209 }
2210 else if ((errcnt1 = read_ata_error_count(atadev, name, cfg.firmwarebugs, false)) < 0) {
2211 PrintOut(LOG_INFO, "Device: %s, no SMART Error Log, ignoring -l error\n", name);
2212 cfg.errorlog = false;
2213 }
2214 else
2215 state.ataerrorcount = errcnt1;
2216 }
2217
2218 if (cfg.xerrorlog) {
2219 int errcnt2;
2220 if (!( cfg.permissive || cfg.firmwarebugs.is_set(BUG_NOLOGDIR)
2221 || (gp_logdir_ok && gp_logdir.entry[0x03-1].numsectors) )) {
2222 PrintOut(LOG_INFO, "Device: %s, no Extended Comprehensive SMART Error Log, ignoring -l xerror (override with -T permissive)\n",
2223 name);
2224 cfg.xerrorlog = false;
2225 }
2226 else if ((errcnt2 = read_ata_error_count(atadev, name, cfg.firmwarebugs, true)) < 0) {
2227 PrintOut(LOG_INFO, "Device: %s, no Extended Comprehensive SMART Error Log, ignoring -l xerror\n", name);
2228 cfg.xerrorlog = false;
2229 }
2230 else if (cfg.errorlog && state.ataerrorcount != errcnt2) {
2231 PrintOut(LOG_INFO, "Device: %s, SMART Error Logs report different error counts: %d != %d\n",
2232 name, state.ataerrorcount, errcnt2);
2233 // Record max error count
2234 if (errcnt2 > state.ataerrorcount)
2235 state.ataerrorcount = errcnt2;
2236 }
2237 else
2238 state.ataerrorcount = errcnt2;
2239 }
2240
2241 // capability check: self-test and offline data collection status
2242 if (cfg.offlinests || cfg.selfteststs) {
2243 if (!(cfg.permissive || (smart_val_ok && state.smartval.offline_data_collection_capability))) {
2244 if (cfg.offlinests)
2245 PrintOut(LOG_INFO, "Device: %s, no SMART Offline Data Collection capability, ignoring -l offlinests (override with -T permissive)\n", name);
2246 if (cfg.selfteststs)
2247 PrintOut(LOG_INFO, "Device: %s, no SMART Self-test capability, ignoring -l selfteststs (override with -T permissive)\n", name);
2248 cfg.offlinests = cfg.selfteststs = false;
2249 }
2250 }
2251
2252 // capabilities check -- does it support powermode?
2253 if (cfg.powermode) {
2254 int powermode = ataCheckPowerMode(atadev);
2255
2256 if (-1 == powermode) {
2257 PrintOut(LOG_CRIT, "Device: %s, no ATA CHECK POWER STATUS support, ignoring -n Directive\n", name);
2258 cfg.powermode=0;
2259 }
2260 else if (powermode!=0x00 && powermode!=0x01
2261 && powermode!=0x40 && powermode!=0x41
2262 && powermode!=0x80 && powermode!=0x81 && powermode!=0x82 && powermode!=0x83
2263 && powermode!=0xff) {
2264 PrintOut(LOG_CRIT, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
2265 name, powermode);
2266 cfg.powermode=0;
2267 }
2268 }
2269
2270 // Apply ATA settings
2271 std::string msg;
2272
2273 if (cfg.set_aam)
2274 format_set_result_msg(msg, "AAM", (cfg.set_aam > 0 ?
2275 ata_set_features(atadev, ATA_ENABLE_AAM, cfg.set_aam-1) :
2276 ata_set_features(atadev, ATA_DISABLE_AAM)), cfg.set_aam, true);
2277
2278 if (cfg.set_apm)
2279 format_set_result_msg(msg, "APM", (cfg.set_apm > 0 ?
2280 ata_set_features(atadev, ATA_ENABLE_APM, cfg.set_apm-1) :
2281 ata_set_features(atadev, ATA_DISABLE_APM)), cfg.set_apm, true);
2282
2283 if (cfg.set_lookahead)
2284 format_set_result_msg(msg, "Rd-ahead", ata_set_features(atadev,
2285 (cfg.set_lookahead > 0 ? ATA_ENABLE_READ_LOOK_AHEAD : ATA_DISABLE_READ_LOOK_AHEAD)),
2286 cfg.set_lookahead);
2287
2288 if (cfg.set_wcache)
2289 format_set_result_msg(msg, "Wr-cache", ata_set_features(atadev,
2290 (cfg.set_wcache > 0? ATA_ENABLE_WRITE_CACHE : ATA_DISABLE_WRITE_CACHE)), cfg.set_wcache);
2291
2292 if (cfg.set_dsn)
2293 format_set_result_msg(msg, "DSN", ata_set_features(atadev,
2294 ATA_ENABLE_DISABLE_DSN, (cfg.set_dsn > 0 ? 0x1 : 0x2)));
2295
2296 if (cfg.set_security_freeze)
2297 format_set_result_msg(msg, "Security freeze",
2298 ata_nodata_command(atadev, ATA_SECURITY_FREEZE_LOCK));
2299
2300 if (cfg.set_standby)
2301 format_set_result_msg(msg, "Standby",
2302 ata_nodata_command(atadev, ATA_IDLE, cfg.set_standby-1), cfg.set_standby, true);
2303
2304 // Report as one log entry
2305 if (!msg.empty())
2306 PrintOut(LOG_INFO, "Device: %s, ATA settings applied: %s\n", name, msg.c_str());
2307
2308 // set SCT Error Recovery Control if requested
2309 if (cfg.sct_erc_set) {
2310 if (!isSCTErrorRecoveryControlCapable(&drive))
2311 PrintOut(LOG_INFO, "Device: %s, no SCT Error Recovery Control support, ignoring -l scterc\n",
2312 name);
2313 else if (locked)
2314 PrintOut(LOG_INFO, "Device: %s, no SCT support if ATA Security is LOCKED, ignoring -l scterc\n",
2315 name);
2316 else if ( ataSetSCTErrorRecoveryControltime(atadev, 1, cfg.sct_erc_readtime )
2317 || ataSetSCTErrorRecoveryControltime(atadev, 2, cfg.sct_erc_writetime))
2318 PrintOut(LOG_INFO, "Device: %s, set of SCT Error Recovery Control failed\n", name);
2319 else
2320 PrintOut(LOG_INFO, "Device: %s, SCT Error Recovery Control set to: Read: %u, Write: %u\n",
2321 name, cfg.sct_erc_readtime, cfg.sct_erc_writetime);
2322 }
2323
2324 // If no tests available or selected, return
2325 if (!( cfg.smartcheck || cfg.selftest
2326 || cfg.errorlog || cfg.xerrorlog
2327 || cfg.offlinests || cfg.selfteststs
2328 || cfg.usagefailed || cfg.prefail || cfg.usage
2329 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)) {
2330 CloseDevice(atadev, name);
2331 return 3;
2332 }
2333
2334 // tell user we are registering device
2335 PrintOut(LOG_INFO,"Device: %s, is SMART capable. Adding to \"monitor\" list.\n",name);
2336
2337 // close file descriptor
2338 CloseDevice(atadev, name);
2339
2340 if (!state_path_prefix.empty() || !attrlog_path_prefix.empty()) {
2341 // Build file name for state file
2342 std::replace_if(model, model+strlen(model), not_allowed_in_filename, '_');
2343 std::replace_if(serial, serial+strlen(serial), not_allowed_in_filename, '_');
2344 if (!state_path_prefix.empty()) {
2345 cfg.state_file = strprintf("%s%s-%s.ata.state", state_path_prefix.c_str(), model, serial);
2346 // Read previous state
2347 if (read_dev_state(cfg.state_file.c_str(), state)) {
2348 PrintOut(LOG_INFO, "Device: %s, state read from %s\n", name, cfg.state_file.c_str());
2349 // Copy ATA attribute values to temp state
2350 state.update_temp_state();
2351 }
2352 }
2353 if (!attrlog_path_prefix.empty())
2354 cfg.attrlog_file = strprintf("%s%s-%s.ata.csv", attrlog_path_prefix.c_str(), model, serial);
2355 }
2356
2357 finish_device_scan(cfg, state);
2358
2359 return 0;
2360 }
2361
2362 // on success, return 0. On failure, return >0. Never return <0,
2363 // please.
SCSIDeviceScan(dev_config & cfg,dev_state & state,scsi_device * scsidev,const dev_config_vector * prev_cfgs)2364 static int SCSIDeviceScan(dev_config & cfg, dev_state & state, scsi_device * scsidev,
2365 const dev_config_vector * prev_cfgs)
2366 {
2367 int err, req_len, avail_len, version, len;
2368 const char *device = cfg.name.c_str();
2369 struct scsi_iec_mode_page iec;
2370 uint8_t tBuf[64];
2371 uint8_t inqBuf[96];
2372 uint8_t vpdBuf[252];
2373 char lu_id[64], serial[256], vendor[40], model[40];
2374
2375 // Device must be open
2376 memset(inqBuf, 0, 96);
2377 req_len = 36;
2378 if ((err = scsiStdInquiry(scsidev, inqBuf, req_len))) {
2379 /* Marvell controllers fail on a 36 bytes StdInquiry, but 64 suffices */
2380 req_len = 64;
2381 if ((err = scsiStdInquiry(scsidev, inqBuf, req_len))) {
2382 PrintOut(LOG_INFO, "Device: %s, Both 36 and 64 byte INQUIRY failed; "
2383 "skip device\n", device);
2384 return 2;
2385 }
2386 }
2387 version = (inqBuf[2] & 0x7f); /* Accept old ISO/IEC 9316:1995 variants */
2388
2389 avail_len = inqBuf[4] + 5;
2390 len = (avail_len < req_len) ? avail_len : req_len;
2391 if (len < 36) {
2392 PrintOut(LOG_INFO, "Device: %s, INQUIRY response less than 36 bytes; "
2393 "skip device\n", device);
2394 return 2;
2395 }
2396
2397 int pdt = inqBuf[0] & 0x1f;
2398
2399 if (! ((0 == pdt) || (4 == pdt) || (5 == pdt) || (7 == pdt) ||
2400 (0xe == pdt))) {
2401 PrintOut(LOG_INFO, "Device: %s, not a disk like device [PDT=0x%x], "
2402 "skip\n", device, pdt);
2403 return 2;
2404 }
2405
2406 if (supported_vpd_pages_p) {
2407 delete supported_vpd_pages_p;
2408 supported_vpd_pages_p = NULL;
2409 }
2410 supported_vpd_pages_p = new supported_vpd_pages(scsidev);
2411
2412 lu_id[0] = '\0';
2413 if ((version >= 0x3) && (version < 0x8)) {
2414 /* SPC to SPC-5 */
2415 if (0 == scsiInquiryVpd(scsidev, SCSI_VPD_DEVICE_IDENTIFICATION,
2416 vpdBuf, sizeof(vpdBuf))) {
2417 len = vpdBuf[3];
2418 scsi_decode_lu_dev_id(vpdBuf + 4, len, lu_id, sizeof(lu_id), NULL);
2419 }
2420 }
2421 serial[0] = '\0';
2422 if (0 == scsiInquiryVpd(scsidev, SCSI_VPD_UNIT_SERIAL_NUMBER,
2423 vpdBuf, sizeof(vpdBuf))) {
2424 len = vpdBuf[3];
2425 vpdBuf[4 + len] = '\0';
2426 scsi_format_id_string(serial, &vpdBuf[4], len);
2427 }
2428
2429 char si_str[64];
2430 struct scsi_readcap_resp srr;
2431 uint64_t capacity = scsiGetSize(scsidev, scsidev->use_rcap16(), &srr);
2432
2433 if (capacity)
2434 format_capacity(si_str, sizeof(si_str), capacity, ".");
2435 else
2436 si_str[0] = '\0';
2437
2438 // Format device id string for warning emails
2439 cfg.dev_idinfo = strprintf("[%.8s %.16s %.4s]%s%s%s%s%s%s",
2440 (char *)&inqBuf[8], (char *)&inqBuf[16], (char *)&inqBuf[32],
2441 (lu_id[0] ? ", lu id: " : ""), (lu_id[0] ? lu_id : ""),
2442 (serial[0] ? ", S/N: " : ""), (serial[0] ? serial : ""),
2443 (si_str[0] ? ", " : ""), (si_str[0] ? si_str : ""));
2444 cfg.id_is_unique = (lu_id[0] || serial[0]);
2445
2446 // format "model" string
2447 scsi_format_id_string(vendor, &inqBuf[8], 8);
2448 scsi_format_id_string(model, &inqBuf[16], 16);
2449 PrintOut(LOG_INFO, "Device: %s, %s\n", device, cfg.dev_idinfo.c_str());
2450
2451 // Check for duplicates
2452 if (prev_cfgs && is_duplicate_dev_idinfo(cfg, *prev_cfgs)) {
2453 CloseDevice(scsidev, device);
2454 return 1;
2455 }
2456
2457 // check that device is ready for commands. IE stores its stuff on
2458 // the media.
2459 if ((err = scsiTestUnitReady(scsidev))) {
2460 if (SIMPLE_ERR_NOT_READY == err)
2461 PrintOut(LOG_INFO, "Device: %s, NOT READY (e.g. spun down); skip device\n", device);
2462 else if (SIMPLE_ERR_NO_MEDIUM == err)
2463 PrintOut(LOG_INFO, "Device: %s, NO MEDIUM present; skip device\n", device);
2464 else if (SIMPLE_ERR_BECOMING_READY == err)
2465 PrintOut(LOG_INFO, "Device: %s, BECOMING (but not yet) READY; skip device\n", device);
2466 else
2467 PrintOut(LOG_CRIT, "Device: %s, failed Test Unit Ready [err=%d]\n", device, err);
2468 CloseDevice(scsidev, device);
2469 return 2;
2470 }
2471
2472 // Badly-conforming USB storage devices may fail this check.
2473 // The response to the following IE mode page fetch (current and
2474 // changeable values) is carefully examined. It has been found
2475 // that various USB devices that malform the response will lock up
2476 // if asked for a log page (e.g. temperature) so it is best to
2477 // bail out now.
2478 if (!(err = scsiFetchIECmpage(scsidev, &iec, state.modese_len)))
2479 state.modese_len = iec.modese_len;
2480 else if (SIMPLE_ERR_BAD_FIELD == err)
2481 ; /* continue since it is reasonable not to support IE mpage */
2482 else { /* any other error (including malformed response) unreasonable */
2483 PrintOut(LOG_INFO,
2484 "Device: %s, Bad IEC (SMART) mode page, err=%d, skip device\n",
2485 device, err);
2486 CloseDevice(scsidev, device);
2487 return 3;
2488 }
2489
2490 // N.B. The following is passive (i.e. it doesn't attempt to turn on
2491 // smart if it is off). This may change to be the same as the ATA side.
2492 if (!scsi_IsExceptionControlEnabled(&iec)) {
2493 PrintOut(LOG_INFO, "Device: %s, IE (SMART) not enabled, skip device\n"
2494 "Try 'smartctl -s on %s' to turn on SMART features\n",
2495 device, device);
2496 CloseDevice(scsidev, device);
2497 return 3;
2498 }
2499
2500 // Flag that certain log pages are supported (information may be
2501 // available from other sources).
2502 if (0 == scsiLogSense(scsidev, SUPPORTED_LPAGES, 0, tBuf, sizeof(tBuf), 0) ||
2503 0 == scsiLogSense(scsidev, SUPPORTED_LPAGES, 0, tBuf, sizeof(tBuf), 68))
2504 /* workaround for the bug #678 on ST8000NM0075/E001. Up to 64 pages + 4b header */
2505 {
2506 for (int k = 4; k < tBuf[3] + LOGPAGEHDRSIZE; ++k) {
2507 switch (tBuf[k]) {
2508 case TEMPERATURE_LPAGE:
2509 state.TempPageSupported = 1;
2510 break;
2511 case IE_LPAGE:
2512 state.SmartPageSupported = 1;
2513 break;
2514 case READ_ERROR_COUNTER_LPAGE:
2515 state.ReadECounterPageSupported = 1;
2516 break;
2517 case WRITE_ERROR_COUNTER_LPAGE:
2518 state.WriteECounterPageSupported = 1;
2519 break;
2520 case VERIFY_ERROR_COUNTER_LPAGE:
2521 state.VerifyECounterPageSupported = 1;
2522 break;
2523 case NON_MEDIUM_ERROR_LPAGE:
2524 state.NonMediumErrorPageSupported = 1;
2525 break;
2526 default:
2527 break;
2528 }
2529 }
2530 }
2531
2532 // Check if scsiCheckIE() is going to work
2533 {
2534 uint8_t asc = 0;
2535 uint8_t ascq = 0;
2536 uint8_t currenttemp = 0;
2537 uint8_t triptemp = 0;
2538
2539 if (scsiCheckIE(scsidev, state.SmartPageSupported, state.TempPageSupported,
2540 &asc, &ascq, ¤ttemp, &triptemp)) {
2541 PrintOut(LOG_INFO, "Device: %s, unexpectedly failed to read SMART values\n", device);
2542 state.SuppressReport = 1;
2543 }
2544 if ( (state.SuppressReport || !currenttemp)
2545 && (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)) {
2546 PrintOut(LOG_INFO, "Device: %s, can't monitor Temperature, ignoring -W %d,%d,%d\n",
2547 device, cfg.tempdiff, cfg.tempinfo, cfg.tempcrit);
2548 cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
2549 }
2550 }
2551
2552 // capability check: self-test-log
2553 if (cfg.selftest){
2554 int retval = scsiCountFailedSelfTests(scsidev, 0);
2555 if (retval<0) {
2556 // no self-test log, turn off monitoring
2557 PrintOut(LOG_INFO, "Device: %s, does not support SMART Self-Test Log.\n", device);
2558 cfg.selftest = false;
2559 state.selflogcount = 0;
2560 state.selfloghour = 0;
2561 }
2562 else {
2563 // register starting values to watch for changes
2564 state.selflogcount=SELFTEST_ERRORCOUNT(retval);
2565 state.selfloghour =SELFTEST_ERRORHOURS(retval);
2566 }
2567 }
2568
2569 // disable autosave (set GLTSD bit)
2570 if (cfg.autosave==1){
2571 if (scsiSetControlGLTSD(scsidev, 1, state.modese_len))
2572 PrintOut(LOG_INFO,"Device: %s, could not disable autosave (set GLTSD bit).\n",device);
2573 else
2574 PrintOut(LOG_INFO,"Device: %s, disabled autosave (set GLTSD bit).\n",device);
2575 }
2576
2577 // or enable autosave (clear GLTSD bit)
2578 if (cfg.autosave==2){
2579 if (scsiSetControlGLTSD(scsidev, 0, state.modese_len))
2580 PrintOut(LOG_INFO,"Device: %s, could not enable autosave (clear GLTSD bit).\n",device);
2581 else
2582 PrintOut(LOG_INFO,"Device: %s, enabled autosave (cleared GLTSD bit).\n",device);
2583 }
2584
2585 // tell user we are registering device
2586 PrintOut(LOG_INFO, "Device: %s, is SMART capable. Adding to \"monitor\" list.\n", device);
2587
2588 // Make sure that init_standby_check() ignores SCSI devices
2589 cfg.offlinests_ns = cfg.selfteststs_ns = false;
2590
2591 // close file descriptor
2592 CloseDevice(scsidev, device);
2593
2594 if (!state_path_prefix.empty() || !attrlog_path_prefix.empty()) {
2595 // Build file name for state file
2596 std::replace_if(model, model+strlen(model), not_allowed_in_filename, '_');
2597 std::replace_if(serial, serial+strlen(serial), not_allowed_in_filename, '_');
2598 if (!state_path_prefix.empty()) {
2599 cfg.state_file = strprintf("%s%s-%s-%s.scsi.state", state_path_prefix.c_str(), vendor, model, serial);
2600 // Read previous state
2601 if (read_dev_state(cfg.state_file.c_str(), state)) {
2602 PrintOut(LOG_INFO, "Device: %s, state read from %s\n", device, cfg.state_file.c_str());
2603 // Copy ATA attribute values to temp state
2604 state.update_temp_state();
2605 }
2606 }
2607 if (!attrlog_path_prefix.empty())
2608 cfg.attrlog_file = strprintf("%s%s-%s-%s.scsi.csv", attrlog_path_prefix.c_str(), vendor, model, serial);
2609 }
2610
2611 finish_device_scan(cfg, state);
2612
2613 return 0;
2614 }
2615
2616 // Convert 128 bit LE integer to uint64_t or its max value on overflow.
le128_to_uint64(const unsigned char (& val)[16])2617 static uint64_t le128_to_uint64(const unsigned char (& val)[16])
2618 {
2619 for (int i = 8; i < 16; i++) {
2620 if (val[i])
2621 return ~(uint64_t)0;
2622 }
2623 uint64_t lo = val[7];
2624 for (int i = 7-1; i >= 0; i--) {
2625 lo <<= 8; lo += val[i];
2626 }
2627 return lo;
2628 }
2629
2630 // Get max temperature in Kelvin reported in NVMe SMART/Health log.
nvme_get_max_temp_kelvin(const nvme_smart_log & smart_log)2631 static int nvme_get_max_temp_kelvin(const nvme_smart_log & smart_log)
2632 {
2633 int k = (smart_log.temperature[1] << 8) | smart_log.temperature[0];
2634 for (int i = 0; i < 8; i++) {
2635 if (smart_log.temp_sensor[i] > k)
2636 k = smart_log.temp_sensor[i];
2637 }
2638 return k;
2639 }
2640
NVMeDeviceScan(dev_config & cfg,dev_state & state,nvme_device * nvmedev,const dev_config_vector * prev_cfgs)2641 static int NVMeDeviceScan(dev_config & cfg, dev_state & state, nvme_device * nvmedev,
2642 const dev_config_vector * prev_cfgs)
2643 {
2644 const char *name = cfg.name.c_str();
2645
2646 // Device must be open
2647
2648 // Get ID Controller
2649 nvme_id_ctrl id_ctrl;
2650 if (!nvme_read_id_ctrl(nvmedev, id_ctrl)) {
2651 PrintOut(LOG_INFO, "Device: %s, NVMe Identify Controller failed\n", name);
2652 CloseDevice(nvmedev, name);
2653 return 2;
2654 }
2655
2656 // Get drive identity
2657 char model[40+1], serial[20+1], firmware[8+1];
2658 format_char_array(model, id_ctrl.mn);
2659 format_char_array(serial, id_ctrl.sn);
2660 format_char_array(firmware, id_ctrl.fr);
2661
2662 // Format device id string for warning emails
2663 char nsstr[32] = "", capstr[32] = "";
2664 unsigned nsid = nvmedev->get_nsid();
2665 if (nsid != 0xffffffff)
2666 snprintf(nsstr, sizeof(nsstr), ", NSID:%u", nsid);
2667 uint64_t capacity = le128_to_uint64(id_ctrl.tnvmcap);
2668 if (capacity)
2669 format_capacity(capstr, sizeof(capstr), capacity, ".");
2670 cfg.dev_idinfo = strprintf("%s, S/N:%s, FW:%s%s%s%s", model, serial, firmware,
2671 nsstr, (capstr[0] ? ", " : ""), capstr);
2672 cfg.id_is_unique = true; // TODO: Check serial?
2673
2674 PrintOut(LOG_INFO, "Device: %s, %s\n", name, cfg.dev_idinfo.c_str());
2675
2676 // Check for duplicates
2677 if (prev_cfgs && is_duplicate_dev_idinfo(cfg, *prev_cfgs)) {
2678 CloseDevice(nvmedev, name);
2679 return 1;
2680 }
2681
2682 // Read SMART/Health log
2683 nvme_smart_log smart_log;
2684 if (!nvme_read_smart_log(nvmedev, smart_log)) {
2685 PrintOut(LOG_INFO, "Device: %s, failed to read NVMe SMART/Health Information\n", name);
2686 CloseDevice(nvmedev, name);
2687 return 2;
2688 }
2689
2690 // Check temperature sensor support
2691 if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit) {
2692 if (!nvme_get_max_temp_kelvin(smart_log)) {
2693 PrintOut(LOG_INFO, "Device: %s, no Temperature sensors, ignoring -W %d,%d,%d\n",
2694 name, cfg.tempdiff, cfg.tempinfo, cfg.tempcrit);
2695 cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
2696 }
2697 }
2698
2699 // Init total error count
2700 if (cfg.errorlog || cfg.xerrorlog) {
2701 state.nvme_err_log_entries = le128_to_uint64(smart_log.num_err_log_entries);
2702 }
2703
2704 // If no supported tests selected, return
2705 if (!( cfg.smartcheck || cfg.errorlog || cfg.xerrorlog
2706 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit )) {
2707 CloseDevice(nvmedev, name);
2708 return 3;
2709 }
2710
2711 // Tell user we are registering device
2712 PrintOut(LOG_INFO,"Device: %s, is SMART capable. Adding to \"monitor\" list.\n", name);
2713
2714 // Make sure that init_standby_check() ignores NVMe devices
2715 cfg.offlinests_ns = cfg.selfteststs_ns = false;
2716
2717 CloseDevice(nvmedev, name);
2718
2719 if (!state_path_prefix.empty()) {
2720 // Build file name for state file
2721 std::replace_if(model, model+strlen(model), not_allowed_in_filename, '_');
2722 std::replace_if(serial, serial+strlen(serial), not_allowed_in_filename, '_');
2723 nsstr[0] = 0;
2724 if (nsid != 0xffffffff)
2725 snprintf(nsstr, sizeof(nsstr), "-n%u", nsid);
2726 cfg.state_file = strprintf("%s%s-%s%s.nvme.state", state_path_prefix.c_str(), model, serial, nsstr);
2727 // Read previous state
2728 if (read_dev_state(cfg.state_file.c_str(), state))
2729 PrintOut(LOG_INFO, "Device: %s, state read from %s\n", name, cfg.state_file.c_str());
2730 }
2731
2732 finish_device_scan(cfg, state);
2733
2734 return 0;
2735 }
2736
2737 // Open device for next check, return false on error
open_device(const dev_config & cfg,dev_state & state,smart_device * device,const char * type)2738 static bool open_device(const dev_config & cfg, dev_state & state, smart_device * device,
2739 const char * type)
2740 {
2741 const char * name = cfg.name.c_str();
2742
2743 // If user has asked, test the email warning system
2744 if (cfg.emailtest)
2745 MailWarning(cfg, state, 0, "TEST EMAIL from smartd for device: %s", name);
2746
2747 // User may have requested (with the -n Directive) to leave the disk
2748 // alone if it is in idle or standby mode. In this case check the
2749 // power mode first before opening the device for full access,
2750 // and exit without check if disk is reported in standby.
2751 if (device->is_ata() && cfg.powermode && !state.powermodefail && !state.removed) {
2752 // Note that 'is_powered_down()' handles opening the device itself, and
2753 // can be used before calling 'open()' (that's the whole point of 'is_powered_down()'!).
2754 if (device->is_powered_down())
2755 {
2756 // skip at most powerskipmax checks
2757 if (!cfg.powerskipmax || state.powerskipcnt<cfg.powerskipmax) {
2758 // report first only except if state has changed, avoid waking up system disk
2759 if ((!state.powerskipcnt || state.lastpowermodeskipped != -1) && !cfg.powerquiet) {
2760 PrintOut(LOG_INFO, "Device: %s, is in %s mode, suspending checks\n", name, "STANDBY (OS)");
2761 state.lastpowermodeskipped = -1;
2762 }
2763 state.powerskipcnt++;
2764 return false;
2765 }
2766 }
2767 }
2768
2769 // if we can't open device, fail gracefully rather than hard --
2770 // perhaps the next time around we'll be able to open it
2771 if (!device->open()) {
2772 // For removable devices, print error message only once and suppress email
2773 if (!cfg.removable) {
2774 PrintOut(LOG_INFO, "Device: %s, open() of %s device failed: %s\n", name, type, device->get_errmsg());
2775 MailWarning(cfg, state, 9, "Device: %s, unable to open %s device", name, type);
2776 }
2777 else if (!state.removed) {
2778 PrintOut(LOG_INFO, "Device: %s, removed %s device: %s\n", name, type, device->get_errmsg());
2779 state.removed = true;
2780 }
2781 else if (debugmode)
2782 PrintOut(LOG_INFO, "Device: %s, %s device still removed: %s\n", name, type, device->get_errmsg());
2783 return false;
2784 }
2785
2786 if (debugmode)
2787 PrintOut(LOG_INFO,"Device: %s, opened %s device\n", name, type);
2788
2789 if (!cfg.removable)
2790 reset_warning_mail(cfg, state, 9, "open of %s device worked again", type);
2791 else if (state.removed) {
2792 PrintOut(LOG_INFO, "Device: %s, reconnected %s device\n", name, type);
2793 state.removed = false;
2794 }
2795
2796 return true;
2797 }
2798
2799 // If the self-test log has got more self-test errors (or more recent
2800 // self-test errors) recorded, then notify user.
CheckSelfTestLogs(const dev_config & cfg,dev_state & state,int newi)2801 static void CheckSelfTestLogs(const dev_config & cfg, dev_state & state, int newi)
2802 {
2803 const char * name = cfg.name.c_str();
2804
2805 if (newi<0)
2806 // command failed
2807 MailWarning(cfg, state, 8, "Device: %s, Read SMART Self-Test Log Failed", name);
2808 else {
2809 reset_warning_mail(cfg, state, 8, "Read SMART Self-Test Log worked again");
2810
2811 // old and new error counts
2812 int oldc=state.selflogcount;
2813 int newc=SELFTEST_ERRORCOUNT(newi);
2814
2815 // old and new error timestamps in hours
2816 int oldh=state.selfloghour;
2817 int newh=SELFTEST_ERRORHOURS(newi);
2818
2819 if (oldc<newc) {
2820 // increase in error count
2821 PrintOut(LOG_CRIT, "Device: %s, Self-Test Log error count increased from %d to %d\n",
2822 name, oldc, newc);
2823 MailWarning(cfg, state, 3, "Device: %s, Self-Test Log error count increased from %d to %d",
2824 name, oldc, newc);
2825 state.must_write = true;
2826 }
2827 else if (newc > 0 && oldh != newh) {
2828 // more recent error
2829 // a 'more recent' error might actually be a smaller hour number,
2830 // if the hour number has wrapped.
2831 // There's still a bug here. You might just happen to run a new test
2832 // exactly 32768 hours after the previous failure, and have run exactly
2833 // 20 tests between the two, in which case smartd will miss the
2834 // new failure.
2835 PrintOut(LOG_CRIT, "Device: %s, new Self-Test Log error at hour timestamp %d\n",
2836 name, newh);
2837 MailWarning(cfg, state, 3, "Device: %s, new Self-Test Log error at hour timestamp %d",
2838 name, newh);
2839 state.must_write = true;
2840 }
2841
2842 // Print info if error entries have disappeared
2843 // or newer successful successful extended self-test exits
2844 if (oldc > newc) {
2845 PrintOut(LOG_INFO, "Device: %s, Self-Test Log error count decreased from %d to %d\n",
2846 name, oldc, newc);
2847 if (newc == 0)
2848 reset_warning_mail(cfg, state, 3, "Self-Test Log does no longer report errors");
2849 }
2850
2851 // Needed since self-test error count may DECREASE. Hour might
2852 // also have changed.
2853 state.selflogcount= newc;
2854 state.selfloghour = newh;
2855 }
2856 return;
2857 }
2858
2859 // Test types, ordered by priority.
2860 static const char test_type_chars[] = "LncrSCO";
2861 static const unsigned num_test_types = sizeof(test_type_chars)-1;
2862
2863 // returns test type if time to do test of type testtype,
2864 // 0 if not time to do test.
next_scheduled_test(const dev_config & cfg,dev_state & state,bool scsi,time_t usetime=0)2865 static char next_scheduled_test(const dev_config & cfg, dev_state & state, bool scsi, time_t usetime = 0)
2866 {
2867 // check that self-testing has been requested
2868 if (cfg.test_regex.empty())
2869 return 0;
2870
2871 // Exit if drive not capable of any test
2872 if ( state.not_cap_long && state.not_cap_short &&
2873 (scsi || (state.not_cap_conveyance && state.not_cap_offline)))
2874 return 0;
2875
2876 // since we are about to call localtime(), be sure glibc is informed
2877 // of any timezone changes we make.
2878 if (!usetime)
2879 FixGlibcTimeZoneBug();
2880
2881 // Is it time for next check?
2882 time_t now = (!usetime ? time(0) : usetime);
2883 if (now < state.scheduled_test_next_check) {
2884 if (state.scheduled_test_next_check <= now + 3600)
2885 return 0; // Next check within one hour
2886 // More than one hour, assume system clock time adjusted to the past
2887 state.scheduled_test_next_check = now;
2888 }
2889 else if (state.scheduled_test_next_check + (3600L*24*90) < now) {
2890 // Limit time check interval to 90 days
2891 state.scheduled_test_next_check = now - (3600L*24*90);
2892 }
2893
2894 // Find ':NNN[-LLL]' in regex for possible offsets and limits
2895 const unsigned max_offsets = 1 + num_test_types;
2896 unsigned offsets[max_offsets] = {0, }, limits[max_offsets] = {0, };
2897 unsigned num_offsets = 1; // offsets/limits[0] == 0 always
2898 for (const char * p = cfg.test_regex.get_pattern(); num_offsets < max_offsets; ) {
2899 const char * q = strchr(p, ':');
2900 if (!q)
2901 break;
2902 p = q + 1;
2903 unsigned offset = 0, limit = 0; int n1 = -1, n2 = -1, n3 = -1;
2904 sscanf(p, "%u%n-%n%u%n", &offset, &n1, &n2, &limit, &n3);
2905 if (!(n1 == 3 && (n2 < 0 || (n3 == 3+1+3 && limit > 0))))
2906 continue;
2907 offsets[num_offsets] = offset; limits[num_offsets] = limit;
2908 num_offsets++;
2909 p += (n3 > 0 ? n3 : n1);
2910 }
2911
2912 // Check interval [state.scheduled_test_next_check, now] for scheduled tests
2913 char testtype = 0;
2914 time_t testtime = 0; int testhour = 0;
2915 int maxtest = num_test_types-1;
2916
2917 for (time_t t = state.scheduled_test_next_check; ; ) {
2918 // Check offset 0 and then all offsets for ':NNN' found above
2919 for (unsigned i = 0; i < num_offsets; i++) {
2920 unsigned offset = offsets[i], limit = limits[i];
2921 unsigned delay = cfg.test_offset_factor * offset;
2922 if (0 < limit && limit < delay)
2923 delay %= limit + 1;
2924 struct tm tmbuf, * tms = time_to_tm_local(&tmbuf, t - (delay * 3600));
2925
2926 // tm_wday is 0 (Sunday) to 6 (Saturday). We use 1 (Monday) to 7 (Sunday).
2927 int weekday = (tms->tm_wday ? tms->tm_wday : 7);
2928 for (int j = 0; j <= maxtest; j++) {
2929 // Skip if drive not capable of this test
2930 switch (test_type_chars[j]) {
2931 case 'L': if (state.not_cap_long) continue; break;
2932 case 'S': if (state.not_cap_short) continue; break;
2933 case 'C': if (scsi || state.not_cap_conveyance) continue; break;
2934 case 'O': if (scsi || state.not_cap_offline) continue; break;
2935 case 'c': case 'n':
2936 case 'r': if (scsi || state.not_cap_selective) continue; break;
2937 default: continue;
2938 }
2939 // Try match of "T/MM/DD/d/HH[:NNN]"
2940 char pattern[64];
2941 snprintf(pattern, sizeof(pattern), "%c/%02d/%02d/%1d/%02d",
2942 test_type_chars[j], tms->tm_mon+1, tms->tm_mday, weekday, tms->tm_hour);
2943 if (i > 0) {
2944 const unsigned len = sizeof("S/01/01/1/01") - 1;
2945 snprintf(pattern + len, sizeof(pattern) - len, ":%03u", offset);
2946 if (limit > 0)
2947 snprintf(pattern + len + 4, sizeof(pattern) - len - 4, "-%03u", limit);
2948 }
2949 if (cfg.test_regex.full_match(pattern)) {
2950 // Test found
2951 testtype = pattern[0];
2952 testtime = t; testhour = tms->tm_hour;
2953 // Limit further matches to higher priority self-tests
2954 maxtest = j-1;
2955 break;
2956 }
2957 }
2958 }
2959
2960 // Exit if no tests left or current time reached
2961 if (maxtest < 0)
2962 break;
2963 if (t >= now)
2964 break;
2965 // Check next hour
2966 if ((t += 3600) > now)
2967 t = now;
2968 }
2969
2970 // Do next check not before next hour.
2971 struct tm tmbuf, * tmnow = time_to_tm_local(&tmbuf, now);
2972 state.scheduled_test_next_check = now + (3600 - tmnow->tm_min*60 - tmnow->tm_sec);
2973
2974 if (testtype) {
2975 state.must_write = true;
2976 // Tell user if an old test was found.
2977 if (!usetime && !(testhour == tmnow->tm_hour && testtime + 3600 > now)) {
2978 char datebuf[DATEANDEPOCHLEN]; dateandtimezoneepoch(datebuf, testtime);
2979 PrintOut(LOG_INFO, "Device: %s, old test of type %c not run at %s, starting now.\n",
2980 cfg.name.c_str(), testtype, datebuf);
2981 }
2982 }
2983
2984 return testtype;
2985 }
2986
2987 // Print a list of future tests.
PrintTestSchedule(const dev_config_vector & configs,dev_state_vector & states,const smart_device_list & devices)2988 static void PrintTestSchedule(const dev_config_vector & configs, dev_state_vector & states, const smart_device_list & devices)
2989 {
2990 unsigned numdev = configs.size();
2991 if (!numdev)
2992 return;
2993 std::vector<int> testcnts(numdev * num_test_types, 0);
2994
2995 PrintOut(LOG_INFO, "\nNext scheduled self tests (at most 5 of each type per device):\n");
2996
2997 // FixGlibcTimeZoneBug(); // done in PrintOut()
2998 time_t now = time(0);
2999 char datenow[DATEANDEPOCHLEN], date[DATEANDEPOCHLEN];
3000 dateandtimezoneepoch(datenow, now);
3001
3002 long seconds;
3003 for (seconds=checktime; seconds<3600L*24*90; seconds+=checktime) {
3004 // Check for each device whether a test will be run
3005 time_t testtime = now + seconds;
3006 for (unsigned i = 0; i < numdev; i++) {
3007 const dev_config & cfg = configs.at(i);
3008 dev_state & state = states.at(i);
3009 const char * p;
3010 char testtype = next_scheduled_test(cfg, state, devices.at(i)->is_scsi(), testtime);
3011 if (testtype && (p = strchr(test_type_chars, testtype))) {
3012 unsigned t = (p - test_type_chars);
3013 // Report at most 5 tests of each type
3014 if (++testcnts[i*num_test_types + t] <= 5) {
3015 dateandtimezoneepoch(date, testtime);
3016 PrintOut(LOG_INFO, "Device: %s, will do test %d of type %c at %s\n", cfg.name.c_str(),
3017 testcnts[i*num_test_types + t], testtype, date);
3018 }
3019 }
3020 }
3021 }
3022
3023 // Report totals
3024 dateandtimezoneepoch(date, now+seconds);
3025 PrintOut(LOG_INFO, "\nTotals [%s - %s]:\n", datenow, date);
3026 for (unsigned i = 0; i < numdev; i++) {
3027 const dev_config & cfg = configs.at(i);
3028 bool scsi = devices.at(i)->is_scsi();
3029 for (unsigned t = 0; t < num_test_types; t++) {
3030 int cnt = testcnts[i*num_test_types + t];
3031 if (cnt == 0 && !strchr((scsi ? "LS" : "LSCO"), test_type_chars[t]))
3032 continue;
3033 PrintOut(LOG_INFO, "Device: %s, will do %3d test%s of type %c\n", cfg.name.c_str(),
3034 cnt, (cnt==1?"":"s"), test_type_chars[t]);
3035 }
3036 }
3037
3038 }
3039
3040 // Return zero on success, nonzero on failure. Perform offline (background)
3041 // short or long (extended) self test on given scsi device.
DoSCSISelfTest(const dev_config & cfg,dev_state & state,scsi_device * device,char testtype)3042 static int DoSCSISelfTest(const dev_config & cfg, dev_state & state, scsi_device * device, char testtype)
3043 {
3044 int retval = 0;
3045 const char *testname = 0;
3046 const char *name = cfg.name.c_str();
3047 int inProgress;
3048
3049 if (scsiSelfTestInProgress(device, &inProgress)) {
3050 PrintOut(LOG_CRIT, "Device: %s, does not support Self-Tests\n", name);
3051 state.not_cap_short = state.not_cap_long = true;
3052 return 1;
3053 }
3054
3055 if (1 == inProgress) {
3056 PrintOut(LOG_INFO, "Device: %s, skip since Self-Test already in "
3057 "progress.\n", name);
3058 return 1;
3059 }
3060
3061 switch (testtype) {
3062 case 'S':
3063 testname = "Short Self";
3064 retval = scsiSmartShortSelfTest(device);
3065 break;
3066 case 'L':
3067 testname = "Long Self";
3068 retval = scsiSmartExtendSelfTest(device);
3069 break;
3070 }
3071 // If we can't do the test, exit
3072 if (NULL == testname) {
3073 PrintOut(LOG_CRIT, "Device: %s, not capable of %c Self-Test\n", name,
3074 testtype);
3075 return 1;
3076 }
3077 if (retval) {
3078 if ((SIMPLE_ERR_BAD_OPCODE == retval) ||
3079 (SIMPLE_ERR_BAD_FIELD == retval)) {
3080 PrintOut(LOG_CRIT, "Device: %s, not capable of %s-Test\n", name,
3081 testname);
3082 if ('L'==testtype)
3083 state.not_cap_long = true;
3084 else
3085 state.not_cap_short = true;
3086
3087 return 1;
3088 }
3089 PrintOut(LOG_CRIT, "Device: %s, execute %s-Test failed (err: %d)\n", name,
3090 testname, retval);
3091 return 1;
3092 }
3093
3094 PrintOut(LOG_INFO, "Device: %s, starting scheduled %s-Test.\n", name, testname);
3095
3096 return 0;
3097 }
3098
3099 // Do an offline immediate or self-test. Return zero on success,
3100 // nonzero on failure.
DoATASelfTest(const dev_config & cfg,dev_state & state,ata_device * device,char testtype)3101 static int DoATASelfTest(const dev_config & cfg, dev_state & state, ata_device * device, char testtype)
3102 {
3103 const char *name = cfg.name.c_str();
3104
3105 // Read current smart data and check status/capability
3106 struct ata_smart_values data;
3107 if (ataReadSmartValues(device, &data) || !(data.offline_data_collection_capability)) {
3108 PrintOut(LOG_CRIT, "Device: %s, not capable of Offline or Self-Testing.\n", name);
3109 return 1;
3110 }
3111
3112 // Check for capability to do the test
3113 int dotest = -1, mode = 0;
3114 const char *testname = 0;
3115 switch (testtype) {
3116 case 'O':
3117 testname="Offline Immediate ";
3118 if (isSupportExecuteOfflineImmediate(&data))
3119 dotest=OFFLINE_FULL_SCAN;
3120 else
3121 state.not_cap_offline = true;
3122 break;
3123 case 'C':
3124 testname="Conveyance Self-";
3125 if (isSupportConveyanceSelfTest(&data))
3126 dotest=CONVEYANCE_SELF_TEST;
3127 else
3128 state.not_cap_conveyance = true;
3129 break;
3130 case 'S':
3131 testname="Short Self-";
3132 if (isSupportSelfTest(&data))
3133 dotest=SHORT_SELF_TEST;
3134 else
3135 state.not_cap_short = true;
3136 break;
3137 case 'L':
3138 testname="Long Self-";
3139 if (isSupportSelfTest(&data))
3140 dotest=EXTEND_SELF_TEST;
3141 else
3142 state.not_cap_long = true;
3143 break;
3144
3145 case 'c': case 'n': case 'r':
3146 testname = "Selective Self-";
3147 if (isSupportSelectiveSelfTest(&data)) {
3148 dotest = SELECTIVE_SELF_TEST;
3149 switch (testtype) {
3150 case 'c': mode = SEL_CONT; break;
3151 case 'n': mode = SEL_NEXT; break;
3152 case 'r': mode = SEL_REDO; break;
3153 }
3154 }
3155 else
3156 state.not_cap_selective = true;
3157 break;
3158 }
3159
3160 // If we can't do the test, exit
3161 if (dotest<0) {
3162 PrintOut(LOG_CRIT, "Device: %s, not capable of %sTest\n", name, testname);
3163 return 1;
3164 }
3165
3166 // If currently running a self-test, do not interrupt it to start another.
3167 if (15==(data.self_test_exec_status >> 4)) {
3168 if (cfg.firmwarebugs.is_set(BUG_SAMSUNG3) && data.self_test_exec_status == 0xf0) {
3169 PrintOut(LOG_INFO, "Device: %s, will not skip scheduled %sTest "
3170 "despite unclear Self-Test byte (SAMSUNG Firmware bug).\n", name, testname);
3171 } else {
3172 PrintOut(LOG_INFO, "Device: %s, skip scheduled %sTest; %1d0%% remaining of current Self-Test.\n",
3173 name, testname, (int)(data.self_test_exec_status & 0x0f));
3174 return 1;
3175 }
3176 }
3177
3178 if (dotest == SELECTIVE_SELF_TEST) {
3179 // Set test span
3180 ata_selective_selftest_args selargs, prev_args;
3181 selargs.num_spans = 1;
3182 selargs.span[0].mode = mode;
3183 prev_args.num_spans = 1;
3184 prev_args.span[0].start = state.selective_test_last_start;
3185 prev_args.span[0].end = state.selective_test_last_end;
3186 if (ataWriteSelectiveSelfTestLog(device, selargs, &data, state.num_sectors, &prev_args)) {
3187 PrintOut(LOG_CRIT, "Device: %s, prepare %sTest failed\n", name, testname);
3188 return 1;
3189 }
3190 uint64_t start = selargs.span[0].start, end = selargs.span[0].end;
3191 PrintOut(LOG_INFO, "Device: %s, %s test span at LBA %" PRIu64 " - %" PRIu64 " (%" PRIu64 " sectors, %u%% - %u%% of disk).\n",
3192 name, (selargs.span[0].mode == SEL_NEXT ? "next" : "redo"),
3193 start, end, end - start + 1,
3194 (unsigned)((100 * start + state.num_sectors/2) / state.num_sectors),
3195 (unsigned)((100 * end + state.num_sectors/2) / state.num_sectors));
3196 state.selective_test_last_start = start;
3197 state.selective_test_last_end = end;
3198 }
3199
3200 // execute the test, and return status
3201 int retval = smartcommandhandler(device, IMMEDIATE_OFFLINE, dotest, NULL);
3202 if (retval) {
3203 PrintOut(LOG_CRIT, "Device: %s, execute %sTest failed.\n", name, testname);
3204 return retval;
3205 }
3206
3207 // Report recent test start to do_disable_standby_check()
3208 // and force log of next test status
3209 if (testtype == 'O')
3210 state.offline_started = true;
3211 else
3212 state.selftest_started = true;
3213
3214 PrintOut(LOG_INFO, "Device: %s, starting scheduled %sTest.\n", name, testname);
3215 return 0;
3216 }
3217
3218 // Check pending sector count attribute values (-C, -U directives).
check_pending(const dev_config & cfg,dev_state & state,unsigned char id,bool increase_only,const ata_smart_values & smartval,int mailtype,const char * msg)3219 static void check_pending(const dev_config & cfg, dev_state & state,
3220 unsigned char id, bool increase_only,
3221 const ata_smart_values & smartval,
3222 int mailtype, const char * msg)
3223 {
3224 // Find attribute index
3225 int i = ata_find_attr_index(id, smartval);
3226 if (!(i >= 0 && ata_find_attr_index(id, state.smartval) == i))
3227 return;
3228
3229 // No report if no sectors pending.
3230 uint64_t rawval = ata_get_attr_raw_value(smartval.vendor_attributes[i], cfg.attribute_defs);
3231 if (rawval == 0) {
3232 reset_warning_mail(cfg, state, mailtype, "No more %s", msg);
3233 return;
3234 }
3235
3236 // If attribute is not reset, report only sector count increases.
3237 uint64_t prev_rawval = ata_get_attr_raw_value(state.smartval.vendor_attributes[i], cfg.attribute_defs);
3238 if (!(!increase_only || prev_rawval < rawval))
3239 return;
3240
3241 // Format message.
3242 std::string s = strprintf("Device: %s, %" PRId64 " %s", cfg.name.c_str(), rawval, msg);
3243 if (prev_rawval > 0 && rawval != prev_rawval)
3244 s += strprintf(" (changed %+" PRId64 ")", rawval - prev_rawval);
3245
3246 PrintOut(LOG_CRIT, "%s\n", s.c_str());
3247 MailWarning(cfg, state, mailtype, "%s", s.c_str());
3248 state.must_write = true;
3249 }
3250
3251 // Format Temperature value
fmt_temp(unsigned char x,char (& buf)[20])3252 static const char * fmt_temp(unsigned char x, char (& buf)[20])
3253 {
3254 if (!x) // unset
3255 return "??";
3256 snprintf(buf, sizeof(buf), "%u", x);
3257 return buf;
3258 }
3259
3260 // Check Temperature limits
CheckTemperature(const dev_config & cfg,dev_state & state,unsigned char currtemp,unsigned char triptemp)3261 static void CheckTemperature(const dev_config & cfg, dev_state & state, unsigned char currtemp, unsigned char triptemp)
3262 {
3263 if (!(0 < currtemp && currtemp < 255)) {
3264 PrintOut(LOG_INFO, "Device: %s, failed to read Temperature\n", cfg.name.c_str());
3265 return;
3266 }
3267
3268 // Update Max Temperature
3269 const char * minchg = "", * maxchg = "";
3270 if (currtemp > state.tempmax) {
3271 if (state.tempmax)
3272 maxchg = "!";
3273 state.tempmax = currtemp;
3274 state.must_write = true;
3275 }
3276
3277 char buf[20];
3278 if (!state.temperature) {
3279 // First check
3280 if (!state.tempmin || currtemp < state.tempmin)
3281 // Delay Min Temperature update by ~ 30 minutes.
3282 state.tempmin_delay = time(0) + CHECKTIME - 60;
3283 PrintOut(LOG_INFO, "Device: %s, initial Temperature is %d Celsius (Min/Max %s/%u%s)\n",
3284 cfg.name.c_str(), (int)currtemp, fmt_temp(state.tempmin, buf), state.tempmax, maxchg);
3285 if (triptemp)
3286 PrintOut(LOG_INFO, " [trip Temperature is %d Celsius]\n", (int)triptemp);
3287 state.temperature = currtemp;
3288 }
3289 else {
3290 if (state.tempmin_delay) {
3291 // End Min Temperature update delay if ...
3292 if ( (state.tempmin && currtemp > state.tempmin) // current temp exceeds recorded min,
3293 || (state.tempmin_delay <= time(0))) { // or delay time is over.
3294 state.tempmin_delay = 0;
3295 if (!state.tempmin)
3296 state.tempmin = 255;
3297 }
3298 }
3299
3300 // Update Min Temperature
3301 if (!state.tempmin_delay && currtemp < state.tempmin) {
3302 state.tempmin = currtemp;
3303 state.must_write = true;
3304 if (currtemp != state.temperature)
3305 minchg = "!";
3306 }
3307
3308 // Track changes
3309 if (cfg.tempdiff && (*minchg || *maxchg || abs((int)currtemp - (int)state.temperature) >= cfg.tempdiff)) {
3310 PrintOut(LOG_INFO, "Device: %s, Temperature changed %+d Celsius to %u Celsius (Min/Max %s%s/%u%s)\n",
3311 cfg.name.c_str(), (int)currtemp-(int)state.temperature, currtemp, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
3312 state.temperature = currtemp;
3313 }
3314 }
3315
3316 // Check limits
3317 if (cfg.tempcrit && currtemp >= cfg.tempcrit) {
3318 PrintOut(LOG_CRIT, "Device: %s, Temperature %u Celsius reached critical limit of %u Celsius (Min/Max %s%s/%u%s)\n",
3319 cfg.name.c_str(), currtemp, cfg.tempcrit, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
3320 MailWarning(cfg, state, 12, "Device: %s, Temperature %d Celsius reached critical limit of %u Celsius (Min/Max %s%s/%u%s)",
3321 cfg.name.c_str(), currtemp, cfg.tempcrit, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
3322 }
3323 else if (cfg.tempinfo && currtemp >= cfg.tempinfo) {
3324 PrintOut(LOG_INFO, "Device: %s, Temperature %u Celsius reached limit of %u Celsius (Min/Max %s%s/%u%s)\n",
3325 cfg.name.c_str(), currtemp, cfg.tempinfo, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
3326 }
3327 else if (cfg.tempcrit) {
3328 unsigned char limit = (cfg.tempinfo ? cfg.tempinfo : cfg.tempcrit-5);
3329 if (currtemp < limit)
3330 reset_warning_mail(cfg, state, 12, "Temperature %u Celsius dropped below %u Celsius", currtemp, limit);
3331 }
3332 }
3333
3334 // Check normalized and raw attribute values.
check_attribute(const dev_config & cfg,dev_state & state,const ata_smart_attribute & attr,const ata_smart_attribute & prev,int attridx,const ata_smart_threshold_entry * thresholds)3335 static void check_attribute(const dev_config & cfg, dev_state & state,
3336 const ata_smart_attribute & attr,
3337 const ata_smart_attribute & prev,
3338 int attridx,
3339 const ata_smart_threshold_entry * thresholds)
3340 {
3341 // Check attribute and threshold
3342 ata_attr_state attrstate = ata_get_attr_state(attr, attridx, thresholds, cfg.attribute_defs);
3343 if (attrstate == ATTRSTATE_NON_EXISTING)
3344 return;
3345
3346 // If requested, check for usage attributes that have failed.
3347 if ( cfg.usagefailed && attrstate == ATTRSTATE_FAILED_NOW
3348 && !cfg.monitor_attr_flags.is_set(attr.id, MONITOR_IGN_FAILUSE)) {
3349 std::string attrname = ata_get_smart_attr_name(attr.id, cfg.attribute_defs, cfg.dev_rpm);
3350 PrintOut(LOG_CRIT, "Device: %s, Failed SMART usage Attribute: %d %s.\n", cfg.name.c_str(), attr.id, attrname.c_str());
3351 MailWarning(cfg, state, 2, "Device: %s, Failed SMART usage Attribute: %d %s.", cfg.name.c_str(), attr.id, attrname.c_str());
3352 state.must_write = true;
3353 }
3354
3355 // Return if we're not tracking this type of attribute
3356 bool prefail = !!ATTRIBUTE_FLAGS_PREFAILURE(attr.flags);
3357 if (!( ( prefail && cfg.prefail)
3358 || (!prefail && cfg.usage )))
3359 return;
3360
3361 // Return if '-I ID' was specified
3362 if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_IGNORE))
3363 return;
3364
3365 // Issue warning if they don't have the same ID in all structures.
3366 if (attr.id != prev.id) {
3367 PrintOut(LOG_INFO,"Device: %s, same Attribute has different ID numbers: %d = %d\n",
3368 cfg.name.c_str(), attr.id, prev.id);
3369 return;
3370 }
3371
3372 // Compare normalized values if valid.
3373 bool valchanged = false;
3374 if (attrstate > ATTRSTATE_NO_NORMVAL) {
3375 if (attr.current != prev.current)
3376 valchanged = true;
3377 }
3378
3379 // Compare raw values if requested.
3380 bool rawchanged = false;
3381 if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW)) {
3382 if ( ata_get_attr_raw_value(attr, cfg.attribute_defs)
3383 != ata_get_attr_raw_value(prev, cfg.attribute_defs))
3384 rawchanged = true;
3385 }
3386
3387 // Return if no change
3388 if (!(valchanged || rawchanged))
3389 return;
3390
3391 // Format value strings
3392 std::string currstr, prevstr;
3393 if (attrstate == ATTRSTATE_NO_NORMVAL) {
3394 // Print raw values only
3395 currstr = strprintf("%s (Raw)",
3396 ata_format_attr_raw_value(attr, cfg.attribute_defs).c_str());
3397 prevstr = strprintf("%s (Raw)",
3398 ata_format_attr_raw_value(prev, cfg.attribute_defs).c_str());
3399 }
3400 else if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW_PRINT)) {
3401 // Print normalized and raw values
3402 currstr = strprintf("%d [Raw %s]", attr.current,
3403 ata_format_attr_raw_value(attr, cfg.attribute_defs).c_str());
3404 prevstr = strprintf("%d [Raw %s]", prev.current,
3405 ata_format_attr_raw_value(prev, cfg.attribute_defs).c_str());
3406 }
3407 else {
3408 // Print normalized values only
3409 currstr = strprintf("%d", attr.current);
3410 prevstr = strprintf("%d", prev.current);
3411 }
3412
3413 // Format message
3414 std::string msg = strprintf("Device: %s, SMART %s Attribute: %d %s changed from %s to %s",
3415 cfg.name.c_str(), (prefail ? "Prefailure" : "Usage"), attr.id,
3416 ata_get_smart_attr_name(attr.id, cfg.attribute_defs, cfg.dev_rpm).c_str(),
3417 prevstr.c_str(), currstr.c_str());
3418
3419 // Report this change as critical ?
3420 if ( (valchanged && cfg.monitor_attr_flags.is_set(attr.id, MONITOR_AS_CRIT))
3421 || (rawchanged && cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW_AS_CRIT))) {
3422 PrintOut(LOG_CRIT, "%s\n", msg.c_str());
3423 MailWarning(cfg, state, 2, "%s", msg.c_str());
3424 }
3425 else {
3426 PrintOut(LOG_INFO, "%s\n", msg.c_str());
3427 }
3428 state.must_write = true;
3429 }
3430
3431
ATACheckDevice(const dev_config & cfg,dev_state & state,ata_device * atadev,bool firstpass,bool allow_selftests)3432 static int ATACheckDevice(const dev_config & cfg, dev_state & state, ata_device * atadev,
3433 bool firstpass, bool allow_selftests)
3434 {
3435 if (!open_device(cfg, state, atadev, "ATA"))
3436 return 1;
3437
3438 const char * name = cfg.name.c_str();
3439
3440 // user may have requested (with the -n Directive) to leave the disk
3441 // alone if it is in idle or sleeping mode. In this case check the
3442 // power mode and exit without check if needed
3443 if (cfg.powermode && !state.powermodefail) {
3444 int dontcheck=0, powermode=ataCheckPowerMode(atadev);
3445 const char * mode = 0;
3446 if (0 <= powermode && powermode < 0xff) {
3447 // wait for possible spin up and check again
3448 int powermode2;
3449 sleep(5);
3450 powermode2 = ataCheckPowerMode(atadev);
3451 if (powermode2 > powermode)
3452 PrintOut(LOG_INFO, "Device: %s, CHECK POWER STATUS spins up disk (0x%02x -> 0x%02x)\n", name, powermode, powermode2);
3453 powermode = powermode2;
3454 }
3455
3456 switch (powermode){
3457 case -1:
3458 // SLEEP
3459 mode="SLEEP";
3460 if (cfg.powermode>=1)
3461 dontcheck=1;
3462 break;
3463 case 0x00:
3464 // STANDBY
3465 mode="STANDBY";
3466 if (cfg.powermode>=2)
3467 dontcheck=1;
3468 break;
3469 case 0x01:
3470 // STANDBY_Y
3471 mode="STANDBY_Y";
3472 if (cfg.powermode>=2)
3473 dontcheck=1;
3474 break;
3475 case 0x80:
3476 // IDLE
3477 mode="IDLE";
3478 if (cfg.powermode>=3)
3479 dontcheck=1;
3480 break;
3481 case 0x81:
3482 // IDLE_A
3483 mode="IDLE_A";
3484 if (cfg.powermode>=3)
3485 dontcheck=1;
3486 break;
3487 case 0x82:
3488 // IDLE_B
3489 mode="IDLE_B";
3490 if (cfg.powermode>=3)
3491 dontcheck=1;
3492 break;
3493 case 0x83:
3494 // IDLE_C
3495 mode="IDLE_C";
3496 if (cfg.powermode>=3)
3497 dontcheck=1;
3498 break;
3499 case 0xff:
3500 // ACTIVE/IDLE
3501 case 0x40:
3502 // ACTIVE
3503 case 0x41:
3504 // ACTIVE
3505 mode="ACTIVE or IDLE";
3506 break;
3507 default:
3508 // UNKNOWN
3509 PrintOut(LOG_CRIT, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
3510 name, powermode);
3511 state.powermodefail = true;
3512 break;
3513 }
3514
3515 // if we are going to skip a check, return now
3516 if (dontcheck){
3517 // skip at most powerskipmax checks
3518 if (!cfg.powerskipmax || state.powerskipcnt<cfg.powerskipmax) {
3519 CloseDevice(atadev, name);
3520 // report first only except if state has changed, avoid waking up system disk
3521 if ((!state.powerskipcnt || state.lastpowermodeskipped != powermode) && !cfg.powerquiet) {
3522 PrintOut(LOG_INFO, "Device: %s, is in %s mode, suspending checks\n", name, mode);
3523 state.lastpowermodeskipped = powermode;
3524 }
3525 state.powerskipcnt++;
3526 return 0;
3527 }
3528 else {
3529 PrintOut(LOG_INFO, "Device: %s, %s mode ignored due to reached limit of skipped checks (%d check%s skipped)\n",
3530 name, mode, state.powerskipcnt, (state.powerskipcnt==1?"":"s"));
3531 }
3532 state.powerskipcnt = 0;
3533 state.tempmin_delay = time(0) + CHECKTIME - 60; // Delay Min Temperature update
3534 }
3535 else if (state.powerskipcnt) {
3536 PrintOut(LOG_INFO, "Device: %s, is back in %s mode, resuming checks (%d check%s skipped)\n",
3537 name, mode, state.powerskipcnt, (state.powerskipcnt==1?"":"s"));
3538 state.powerskipcnt = 0;
3539 state.tempmin_delay = time(0) + CHECKTIME - 60; // Delay Min Temperature update
3540 }
3541 }
3542
3543 // check smart status
3544 if (cfg.smartcheck) {
3545 int status=ataSmartStatus2(atadev);
3546 if (status==-1){
3547 PrintOut(LOG_INFO,"Device: %s, not capable of SMART self-check\n",name);
3548 MailWarning(cfg, state, 5, "Device: %s, not capable of SMART self-check", name);
3549 state.must_write = true;
3550 }
3551 else if (status==1){
3552 PrintOut(LOG_CRIT, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!\n", name);
3553 MailWarning(cfg, state, 1, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!", name);
3554 state.must_write = true;
3555 }
3556 }
3557
3558 // Check everything that depends upon SMART Data (eg, Attribute values)
3559 if ( cfg.usagefailed || cfg.prefail || cfg.usage
3560 || cfg.curr_pending_id || cfg.offl_pending_id
3561 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit
3562 || cfg.selftest || cfg.offlinests || cfg.selfteststs) {
3563
3564 // Read current attribute values.
3565 ata_smart_values curval;
3566 if (ataReadSmartValues(atadev, &curval)){
3567 PrintOut(LOG_CRIT, "Device: %s, failed to read SMART Attribute Data\n", name);
3568 MailWarning(cfg, state, 6, "Device: %s, failed to read SMART Attribute Data", name);
3569 state.must_write = true;
3570 }
3571 else {
3572 reset_warning_mail(cfg, state, 6, "read SMART Attribute Data worked again");
3573
3574 // look for current or offline pending sectors
3575 if (cfg.curr_pending_id)
3576 check_pending(cfg, state, cfg.curr_pending_id, cfg.curr_pending_incr, curval, 10,
3577 (!cfg.curr_pending_incr ? "Currently unreadable (pending) sectors"
3578 : "Total unreadable (pending) sectors" ));
3579
3580 if (cfg.offl_pending_id)
3581 check_pending(cfg, state, cfg.offl_pending_id, cfg.offl_pending_incr, curval, 11,
3582 (!cfg.offl_pending_incr ? "Offline uncorrectable sectors"
3583 : "Total offline uncorrectable sectors"));
3584
3585 // check temperature limits
3586 if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
3587 CheckTemperature(cfg, state, ata_return_temperature_value(&curval, cfg.attribute_defs), 0);
3588
3589 // look for failed usage attributes, or track usage or prefail attributes
3590 if (cfg.usagefailed || cfg.prefail || cfg.usage) {
3591 for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
3592 check_attribute(cfg, state,
3593 curval.vendor_attributes[i],
3594 state.smartval.vendor_attributes[i],
3595 i, state.smartthres.thres_entries);
3596 }
3597 }
3598
3599 // Log changes of offline data collection status
3600 if (cfg.offlinests) {
3601 if ( curval.offline_data_collection_status
3602 != state.smartval.offline_data_collection_status
3603 || state.offline_started // test was started in previous call
3604 || (firstpass && (debugmode || (curval.offline_data_collection_status & 0x7d))))
3605 log_offline_data_coll_status(name, curval.offline_data_collection_status);
3606 }
3607
3608 // Log changes of self-test execution status
3609 if (cfg.selfteststs) {
3610 if ( curval.self_test_exec_status != state.smartval.self_test_exec_status
3611 || state.selftest_started // test was started in previous call
3612 || (firstpass && (debugmode || (curval.self_test_exec_status & 0xf0))))
3613 log_self_test_exec_status(name, curval.self_test_exec_status);
3614 }
3615
3616 // Save the new values for the next time around
3617 state.smartval = curval;
3618 }
3619 }
3620 state.offline_started = state.selftest_started = false;
3621
3622 // check if number of selftest errors has increased (note: may also DECREASE)
3623 if (cfg.selftest)
3624 CheckSelfTestLogs(cfg, state, SelfTestErrorCount(atadev, name, cfg.firmwarebugs));
3625
3626 // check if number of ATA errors has increased
3627 if (cfg.errorlog || cfg.xerrorlog) {
3628
3629 int errcnt1 = -1, errcnt2 = -1;
3630 if (cfg.errorlog)
3631 errcnt1 = read_ata_error_count(atadev, name, cfg.firmwarebugs, false);
3632 if (cfg.xerrorlog)
3633 errcnt2 = read_ata_error_count(atadev, name, cfg.firmwarebugs, true);
3634
3635 // new number of errors is max of both logs
3636 int newc = (errcnt1 >= errcnt2 ? errcnt1 : errcnt2);
3637
3638 // did command fail?
3639 if (newc<0)
3640 // lack of PrintOut here is INTENTIONAL
3641 MailWarning(cfg, state, 7, "Device: %s, Read SMART Error Log Failed", name);
3642
3643 // has error count increased?
3644 int oldc = state.ataerrorcount;
3645 if (newc>oldc){
3646 PrintOut(LOG_CRIT, "Device: %s, ATA error count increased from %d to %d\n",
3647 name, oldc, newc);
3648 MailWarning(cfg, state, 4, "Device: %s, ATA error count increased from %d to %d",
3649 name, oldc, newc);
3650 state.must_write = true;
3651 }
3652
3653 if (newc>=0)
3654 state.ataerrorcount=newc;
3655 }
3656
3657 // if the user has asked, and device is capable (or we're not yet
3658 // sure) check whether a self test should be done now.
3659 if (allow_selftests && !cfg.test_regex.empty()) {
3660 char testtype = next_scheduled_test(cfg, state, false/*!scsi*/);
3661 if (testtype)
3662 DoATASelfTest(cfg, state, atadev, testtype);
3663 }
3664
3665 // Don't leave device open -- the OS/user may want to access it
3666 // before the next smartd cycle!
3667 CloseDevice(atadev, name);
3668
3669 // Copy ATA attribute values to persistent state
3670 state.update_persistent_state();
3671
3672 state.attrlog_dirty = true;
3673 return 0;
3674 }
3675
SCSICheckDevice(const dev_config & cfg,dev_state & state,scsi_device * scsidev,bool allow_selftests)3676 static int SCSICheckDevice(const dev_config & cfg, dev_state & state, scsi_device * scsidev, bool allow_selftests)
3677 {
3678 if (!open_device(cfg, state, scsidev, "SCSI"))
3679 return 1;
3680
3681 const char * name = cfg.name.c_str();
3682
3683 uint8_t asc = 0, ascq = 0;
3684 uint8_t currenttemp = 0, triptemp = 0;
3685 if (!state.SuppressReport) {
3686 if (scsiCheckIE(scsidev, state.SmartPageSupported, state.TempPageSupported,
3687 &asc, &ascq, ¤ttemp, &triptemp)) {
3688 PrintOut(LOG_INFO, "Device: %s, failed to read SMART values\n",
3689 name);
3690 MailWarning(cfg, state, 6, "Device: %s, failed to read SMART values", name);
3691 state.SuppressReport = 1;
3692 }
3693 }
3694 if (asc > 0) {
3695 const char * cp = scsiGetIEString(asc, ascq);
3696 if (cp) {
3697 PrintOut(LOG_CRIT, "Device: %s, SMART Failure: %s\n", name, cp);
3698 MailWarning(cfg, state, 1,"Device: %s, SMART Failure: %s", name, cp);
3699 } else if (asc == 4 && ascq == 9) {
3700 PrintOut(LOG_INFO,"Device: %s, self-test in progress\n", name);
3701 } else if (debugmode)
3702 PrintOut(LOG_INFO,"Device: %s, non-SMART asc,ascq: %d,%d\n",
3703 name, (int)asc, (int)ascq);
3704 } else if (debugmode)
3705 PrintOut(LOG_INFO,"Device: %s, SMART health: passed\n", name);
3706
3707 // check temperature limits
3708 if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
3709 CheckTemperature(cfg, state, currenttemp, triptemp);
3710
3711 // check if number of selftest errors has increased (note: may also DECREASE)
3712 if (cfg.selftest)
3713 CheckSelfTestLogs(cfg, state, scsiCountFailedSelfTests(scsidev, 0));
3714
3715 if (allow_selftests && !cfg.test_regex.empty()) {
3716 char testtype = next_scheduled_test(cfg, state, true/*scsi*/);
3717 if (testtype)
3718 DoSCSISelfTest(cfg, state, scsidev, testtype);
3719 }
3720 if (!cfg.attrlog_file.empty()){
3721 // saving error counters to state
3722 uint8_t tBuf[252];
3723 if (state.ReadECounterPageSupported && (0 == scsiLogSense(scsidev,
3724 READ_ERROR_COUNTER_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
3725 scsiDecodeErrCounterPage(tBuf, &state.scsi_error_counters[0].errCounter);
3726 state.scsi_error_counters[0].found=1;
3727 }
3728 if (state.WriteECounterPageSupported && (0 == scsiLogSense(scsidev,
3729 WRITE_ERROR_COUNTER_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
3730 scsiDecodeErrCounterPage(tBuf, &state.scsi_error_counters[1].errCounter);
3731 state.scsi_error_counters[1].found=1;
3732 }
3733 if (state.VerifyECounterPageSupported && (0 == scsiLogSense(scsidev,
3734 VERIFY_ERROR_COUNTER_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
3735 scsiDecodeErrCounterPage(tBuf, &state.scsi_error_counters[2].errCounter);
3736 state.scsi_error_counters[2].found=1;
3737 }
3738 if (state.NonMediumErrorPageSupported && (0 == scsiLogSense(scsidev,
3739 NON_MEDIUM_ERROR_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
3740 scsiDecodeNonMediumErrPage(tBuf, &state.scsi_nonmedium_error.nme);
3741 state.scsi_nonmedium_error.found=1;
3742 }
3743 // store temperature if not done by CheckTemperature() above
3744 if (!(cfg.tempdiff || cfg.tempinfo || cfg.tempcrit))
3745 state.temperature = currenttemp;
3746 }
3747 CloseDevice(scsidev, name);
3748 state.attrlog_dirty = true;
3749 return 0;
3750 }
3751
NVMeCheckDevice(const dev_config & cfg,dev_state & state,nvme_device * nvmedev)3752 static int NVMeCheckDevice(const dev_config & cfg, dev_state & state, nvme_device * nvmedev)
3753 {
3754 if (!open_device(cfg, state, nvmedev, "NVMe"))
3755 return 1;
3756
3757 const char * name = cfg.name.c_str();
3758
3759 // Read SMART/Health log
3760 nvme_smart_log smart_log;
3761 if (!nvme_read_smart_log(nvmedev, smart_log)) {
3762 PrintOut(LOG_INFO, "Device: %s, failed to read NVMe SMART/Health Information\n", name);
3763 MailWarning(cfg, state, 6, "Device: %s, failed to read NVMe SMART/Health Information", name);
3764 state.must_write = true;
3765 return 0;
3766 }
3767
3768 // Check Critical Warning bits
3769 if (cfg.smartcheck && smart_log.critical_warning) {
3770 unsigned char w = smart_log.critical_warning;
3771 std::string msg;
3772 static const char * const wnames[] =
3773 {"LowSpare", "Temperature", "Reliability", "R/O", "VolMemBackup"};
3774
3775 for (unsigned b = 0, cnt = 0; b < 8 ; b++) {
3776 if (!(w & (1 << b)))
3777 continue;
3778 if (cnt)
3779 msg += ", ";
3780 if (++cnt > 3) {
3781 msg += "..."; break;
3782 }
3783 if (b >= sizeof(wnames)/sizeof(wnames[0])) {
3784 msg += "*Unknown*"; break;
3785 }
3786 msg += wnames[b];
3787 }
3788
3789 PrintOut(LOG_CRIT, "Device: %s, Critical Warning (0x%02x): %s\n", name, w, msg.c_str());
3790 MailWarning(cfg, state, 1, "Device: %s, Critical Warning (0x%02x): %s", name, w, msg.c_str());
3791 state.must_write = true;
3792 }
3793
3794 // Check temperature limits
3795 if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit) {
3796 int k = nvme_get_max_temp_kelvin(smart_log);
3797 // Convert Kelvin to positive Celsius (TODO: Allow negative temperatures)
3798 int c = k - 273;
3799 if (c < 1)
3800 c = 1;
3801 else if (c > 0xff)
3802 c = 0xff;
3803 CheckTemperature(cfg, state, c, 0);
3804 }
3805
3806 // Check if number of errors has increased
3807 if (cfg.errorlog || cfg.xerrorlog) {
3808 uint64_t oldcnt = state.nvme_err_log_entries;
3809 uint64_t newcnt = le128_to_uint64(smart_log.num_err_log_entries);
3810 if (newcnt > oldcnt) {
3811 PrintOut(LOG_CRIT, "Device: %s, number of Error Log entries increased from %" PRIu64 " to %" PRIu64 "\n",
3812 name, oldcnt, newcnt);
3813 MailWarning(cfg, state, 4, "Device: %s, number of Error Log entries increased from %" PRIu64 " to %" PRIu64,
3814 name, oldcnt, newcnt);
3815 state.must_write = true;
3816 }
3817 state.nvme_err_log_entries = newcnt;
3818 }
3819
3820 CloseDevice(nvmedev, name);
3821 state.attrlog_dirty = true;
3822 return 0;
3823 }
3824
3825 // 0=not used, 1=not disabled, 2=disable rejected by OS, 3=disabled
3826 static int standby_disable_state = 0;
3827
init_disable_standby_check(dev_config_vector & configs)3828 static void init_disable_standby_check(dev_config_vector & configs)
3829 {
3830 // Check for '-l offlinests,ns' or '-l selfteststs,ns' directives
3831 bool sts1 = false, sts2 = false;
3832 for (unsigned i = 0; i < configs.size() && !(sts1 || sts2); i++) {
3833 const dev_config & cfg = configs.at(i);
3834 if (cfg.offlinests_ns)
3835 sts1 = true;
3836 if (cfg.selfteststs_ns)
3837 sts2 = true;
3838 }
3839
3840 // Check for support of disable auto standby
3841 // Reenable standby if smartd.conf was reread
3842 if (sts1 || sts2 || standby_disable_state == 3) {
3843 if (!smi()->disable_system_auto_standby(false)) {
3844 if (standby_disable_state == 3)
3845 PrintOut(LOG_CRIT, "System auto standby enable failed: %s\n", smi()->get_errmsg());
3846 if (sts1 || sts2) {
3847 PrintOut(LOG_INFO, "Disable auto standby not supported, ignoring ',ns' from %s%s%s\n",
3848 (sts1 ? "-l offlinests,ns" : ""), (sts1 && sts2 ? " and " : ""), (sts2 ? "-l selfteststs,ns" : ""));
3849 sts1 = sts2 = false;
3850 }
3851 }
3852 }
3853
3854 standby_disable_state = (sts1 || sts2 ? 1 : 0);
3855 }
3856
do_disable_standby_check(const dev_config_vector & configs,const dev_state_vector & states)3857 static void do_disable_standby_check(const dev_config_vector & configs, const dev_state_vector & states)
3858 {
3859 if (!standby_disable_state)
3860 return;
3861
3862 // Check for just started or still running self-tests
3863 bool running = false;
3864 for (unsigned i = 0; i < configs.size() && !running; i++) {
3865 const dev_config & cfg = configs.at(i); const dev_state & state = states.at(i);
3866
3867 if ( ( cfg.offlinests_ns
3868 && (state.offline_started ||
3869 is_offl_coll_in_progress(state.smartval.offline_data_collection_status)))
3870 || ( cfg.selfteststs_ns
3871 && (state.selftest_started ||
3872 is_self_test_in_progress(state.smartval.self_test_exec_status))) )
3873 running = true;
3874 // state.offline/selftest_started will be reset after next logging of test status
3875 }
3876
3877 // Disable/enable auto standby and log state changes
3878 if (!running) {
3879 if (standby_disable_state != 1) {
3880 if (!smi()->disable_system_auto_standby(false))
3881 PrintOut(LOG_CRIT, "Self-test(s) completed, system auto standby enable failed: %s\n",
3882 smi()->get_errmsg());
3883 else
3884 PrintOut(LOG_INFO, "Self-test(s) completed, system auto standby enabled\n");
3885 standby_disable_state = 1;
3886 }
3887 }
3888 else if (!smi()->disable_system_auto_standby(true)) {
3889 if (standby_disable_state != 2) {
3890 PrintOut(LOG_INFO, "Self-test(s) in progress, system auto standby disable rejected: %s\n",
3891 smi()->get_errmsg());
3892 standby_disable_state = 2;
3893 }
3894 }
3895 else {
3896 if (standby_disable_state != 3) {
3897 PrintOut(LOG_INFO, "Self-test(s) in progress, system auto standby disabled\n");
3898 standby_disable_state = 3;
3899 }
3900 }
3901 }
3902
3903 // Checks the SMART status of all ATA and SCSI devices
CheckDevicesOnce(const dev_config_vector & configs,dev_state_vector & states,smart_device_list & devices,bool firstpass,bool allow_selftests)3904 static void CheckDevicesOnce(const dev_config_vector & configs, dev_state_vector & states,
3905 smart_device_list & devices, bool firstpass, bool allow_selftests)
3906 {
3907 for (unsigned i = 0; i < configs.size(); i++) {
3908 const dev_config & cfg = configs.at(i);
3909 dev_state & state = states.at(i);
3910 smart_device * dev = devices.at(i);
3911 if (dev->is_ata())
3912 ATACheckDevice(cfg, state, dev->to_ata(), firstpass, allow_selftests);
3913 else if (dev->is_scsi())
3914 SCSICheckDevice(cfg, state, dev->to_scsi(), allow_selftests);
3915 else if (dev->is_nvme())
3916 NVMeCheckDevice(cfg, state, dev->to_nvme());
3917 }
3918
3919 do_disable_standby_check(configs, states);
3920 }
3921
3922 // Install all signal handlers
install_signal_handlers()3923 static void install_signal_handlers()
3924 {
3925 // normal and abnormal exit
3926 set_signal_if_not_ignored(SIGTERM, sighandler);
3927 set_signal_if_not_ignored(SIGQUIT, sighandler);
3928
3929 // in debug mode, <CONTROL-C> ==> HUP
3930 set_signal_if_not_ignored(SIGINT, (debugmode ? HUPhandler : sighandler));
3931
3932 // Catch HUP and USR1
3933 set_signal_if_not_ignored(SIGHUP, HUPhandler);
3934 set_signal_if_not_ignored(SIGUSR1, USR1handler);
3935 #ifdef _WIN32
3936 set_signal_if_not_ignored(SIGUSR2, USR2handler);
3937 #endif
3938 }
3939
3940 #ifdef _WIN32
3941 // Toggle debug mode implemented for native windows only
3942 // (there is no easy way to reopen tty on *nix)
ToggleDebugMode()3943 static void ToggleDebugMode()
3944 {
3945 if (!debugmode) {
3946 PrintOut(LOG_INFO,"Signal USR2 - enabling debug mode\n");
3947 if (!daemon_enable_console("smartd [Debug]")) {
3948 debugmode = 1;
3949 daemon_signal(SIGINT, HUPhandler);
3950 PrintOut(LOG_INFO,"smartd debug mode enabled, PID=%d\n", getpid());
3951 }
3952 else
3953 PrintOut(LOG_INFO,"enable console failed\n");
3954 }
3955 else if (debugmode == 1) {
3956 daemon_disable_console();
3957 debugmode = 0;
3958 daemon_signal(SIGINT, sighandler);
3959 PrintOut(LOG_INFO,"Signal USR2 - debug mode disabled\n");
3960 }
3961 else
3962 PrintOut(LOG_INFO,"Signal USR2 - debug mode %d not changed\n", debugmode);
3963 }
3964 #endif
3965
dosleep(time_t wakeuptime,bool & sigwakeup,int numdev)3966 static time_t dosleep(time_t wakeuptime, bool & sigwakeup, int numdev)
3967 {
3968 // If past wake-up-time, compute next wake-up-time
3969 time_t timenow=time(NULL);
3970 while (wakeuptime<=timenow){
3971 time_t intervals = 1 + (timenow-wakeuptime)/checktime;
3972 wakeuptime+=intervals*checktime;
3973 }
3974
3975 notify_wait(wakeuptime, numdev);
3976
3977 // sleep until we catch SIGUSR1 or have completed sleeping
3978 int addtime = 0;
3979 while (timenow < wakeuptime+addtime && !caughtsigUSR1 && !caughtsigHUP && !caughtsigEXIT) {
3980
3981 // protect user again system clock being adjusted backwards
3982 if (wakeuptime>timenow+checktime){
3983 PrintOut(LOG_INFO, "System clock time adjusted to the past. Resetting next wakeup time.\n");
3984 wakeuptime=timenow+checktime;
3985 }
3986
3987 // Exit sleep when time interval has expired or a signal is received
3988 sleep(wakeuptime+addtime-timenow);
3989
3990 #ifdef _WIN32
3991 // toggle debug mode?
3992 if (caughtsigUSR2) {
3993 ToggleDebugMode();
3994 caughtsigUSR2 = 0;
3995 }
3996 #endif
3997
3998 timenow=time(NULL);
3999
4000 // Actual sleep time too long?
4001 if (!addtime && timenow > wakeuptime+60) {
4002 if (debugmode)
4003 PrintOut(LOG_INFO, "Sleep time was %d seconds too long, assuming wakeup from standby mode.\n",
4004 (int)(timenow-wakeuptime));
4005 // Wait another 20 seconds to avoid I/O errors during disk spin-up
4006 addtime = timenow-wakeuptime+20;
4007 // Use next wake-up-time if close
4008 int nextcheck = checktime - addtime % checktime;
4009 if (nextcheck <= 20)
4010 addtime += nextcheck;
4011 }
4012 }
4013
4014 // if we caught a SIGUSR1 then print message and clear signal
4015 if (caughtsigUSR1){
4016 PrintOut(LOG_INFO,"Signal USR1 - checking devices now rather than in %d seconds.\n",
4017 wakeuptime-timenow>0?(int)(wakeuptime-timenow):0);
4018 caughtsigUSR1=0;
4019 sigwakeup = true;
4020 }
4021
4022 // return adjusted wakeuptime
4023 return wakeuptime;
4024 }
4025
4026 // Print out a list of valid arguments for the Directive d
printoutvaliddirectiveargs(int priority,char d)4027 static void printoutvaliddirectiveargs(int priority, char d)
4028 {
4029 switch (d) {
4030 case 'n':
4031 PrintOut(priority, "never[,N][,q], sleep[,N][,q], standby[,N][,q], idle[,N][,q]");
4032 break;
4033 case 's':
4034 PrintOut(priority, "valid_regular_expression");
4035 break;
4036 case 'd':
4037 PrintOut(priority, "%s", smi()->get_valid_dev_types_str().c_str());
4038 break;
4039 case 'T':
4040 PrintOut(priority, "normal, permissive");
4041 break;
4042 case 'o':
4043 case 'S':
4044 PrintOut(priority, "on, off");
4045 break;
4046 case 'l':
4047 PrintOut(priority, "error, selftest");
4048 break;
4049 case 'M':
4050 PrintOut(priority, "\"once\", \"daily\", \"diminishing\", \"test\", \"exec\"");
4051 break;
4052 case 'v':
4053 PrintOut(priority, "\n%s\n", create_vendor_attribute_arg_list().c_str());
4054 break;
4055 case 'P':
4056 PrintOut(priority, "use, ignore, show, showall");
4057 break;
4058 case 'F':
4059 PrintOut(priority, "%s", get_valid_firmwarebug_args());
4060 break;
4061 case 'e':
4062 PrintOut(priority, "aam,[N|off], apm,[N|off], lookahead,[on|off], dsn,[on|off] "
4063 "security-freeze, standby,[N|off], wcache,[on|off]");
4064 break;
4065 }
4066 }
4067
4068 // exits with an error message, or returns integer value of token
GetInteger(const char * arg,const char * name,const char * token,int lineno,const char * cfgfile,int min,int max,char * suffix=0)4069 static int GetInteger(const char *arg, const char *name, const char *token, int lineno, const char *cfgfile,
4070 int min, int max, char * suffix = 0)
4071 {
4072 // make sure argument is there
4073 if (!arg) {
4074 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s takes integer argument from %d to %d.\n",
4075 cfgfile, lineno, name, token, min, max);
4076 return -1;
4077 }
4078
4079 // get argument value (base 10), check that it's integer, and in-range
4080 char *endptr;
4081 int val = strtol(arg,&endptr,10);
4082
4083 // optional suffix present?
4084 if (suffix) {
4085 if (!strcmp(endptr, suffix))
4086 endptr += strlen(suffix);
4087 else
4088 *suffix = 0;
4089 }
4090
4091 if (!(!*endptr && min <= val && val <= max)) {
4092 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s has argument: %s; needs integer from %d to %d.\n",
4093 cfgfile, lineno, name, token, arg, min, max);
4094 return -1;
4095 }
4096
4097 // all is well; return value
4098 return val;
4099 }
4100
4101
4102 // Get 1-3 small integer(s) for '-W' directive
Get3Integers(const char * arg,const char * name,const char * token,int lineno,const char * cfgfile,unsigned char * val1,unsigned char * val2,unsigned char * val3)4103 static int Get3Integers(const char *arg, const char *name, const char *token, int lineno, const char *cfgfile,
4104 unsigned char *val1, unsigned char *val2, unsigned char *val3)
4105 {
4106 unsigned v1 = 0, v2 = 0, v3 = 0;
4107 int n1 = -1, n2 = -1, n3 = -1, len;
4108 if (!arg) {
4109 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s takes 1-3 integer argument(s) from 0 to 255.\n",
4110 cfgfile, lineno, name, token);
4111 return -1;
4112 }
4113
4114 len = strlen(arg);
4115 if (!( sscanf(arg, "%u%n,%u%n,%u%n", &v1, &n1, &v2, &n2, &v3, &n3) >= 1
4116 && (n1 == len || n2 == len || n3 == len) && v1 <= 255 && v2 <= 255 && v3 <= 255)) {
4117 PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s has argument: %s; needs 1-3 integer(s) from 0 to 255.\n",
4118 cfgfile, lineno, name, token, arg);
4119 return -1;
4120 }
4121 *val1 = (unsigned char)v1; *val2 = (unsigned char)v2; *val3 = (unsigned char)v3;
4122 return 0;
4123 }
4124
4125
4126 #ifdef _WIN32
4127
4128 // Concatenate strtok() results if quoted with "..."
strtok_dequote(const char * delimiters)4129 static const char * strtok_dequote(const char * delimiters)
4130 {
4131 const char * t = strtok(0, delimiters);
4132 if (!t || t[0] != '"')
4133 return t;
4134
4135 static std::string token;
4136 token = t+1;
4137 for (;;) {
4138 t = strtok(0, delimiters);
4139 if (!t || !*t)
4140 return "\"";
4141 token += ' ';
4142 int len = strlen(t);
4143 if (t[len-1] == '"') {
4144 token += std::string(t, len-1);
4145 break;
4146 }
4147 token += t;
4148 }
4149 return token.c_str();
4150 }
4151
4152 #endif // _WIN32
4153
4154
4155 // This function returns 1 if it has correctly parsed one token (and
4156 // any arguments), else zero if no tokens remain. It returns -1 if an
4157 // error was encountered.
ParseToken(char * token,dev_config & cfg,smart_devtype_list & scan_types)4158 static int ParseToken(char * token, dev_config & cfg, smart_devtype_list & scan_types)
4159 {
4160 char sym;
4161 const char * name = cfg.name.c_str();
4162 int lineno=cfg.lineno;
4163 const char *delim = " \n\t";
4164 int badarg = 0;
4165 int missingarg = 0;
4166 const char *arg = 0;
4167
4168 // is the rest of the line a comment
4169 if (*token=='#')
4170 return 1;
4171
4172 // is the token not recognized?
4173 if (*token!='-' || strlen(token)!=2) {
4174 PrintOut(LOG_CRIT,"File %s line %d (drive %s): unknown Directive: %s\n",
4175 configfile, lineno, name, token);
4176 PrintOut(LOG_CRIT, "Run smartd -D to print a list of valid Directives.\n");
4177 return -1;
4178 }
4179
4180 // token we will be parsing:
4181 sym=token[1];
4182
4183 // parse the token and swallow its argument
4184 int val;
4185 char plus[] = "+", excl[] = "!";
4186
4187 switch (sym) {
4188 case 'C':
4189 // monitor current pending sector count (default 197)
4190 if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 0, 255, plus)) < 0)
4191 return -1;
4192 cfg.curr_pending_id = (unsigned char)val;
4193 cfg.curr_pending_incr = (*plus == '+');
4194 cfg.curr_pending_set = true;
4195 break;
4196 case 'U':
4197 // monitor offline uncorrectable sectors (default 198)
4198 if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 0, 255, plus)) < 0)
4199 return -1;
4200 cfg.offl_pending_id = (unsigned char)val;
4201 cfg.offl_pending_incr = (*plus == '+');
4202 cfg.offl_pending_set = true;
4203 break;
4204 case 'T':
4205 // Set tolerance level for SMART command failures
4206 if ((arg = strtok(NULL, delim)) == NULL) {
4207 missingarg = 1;
4208 } else if (!strcmp(arg, "normal")) {
4209 // Normal mode: exit on failure of a mandatory S.M.A.R.T. command, but
4210 // not on failure of an optional S.M.A.R.T. command.
4211 // This is the default so we don't need to actually do anything here.
4212 cfg.permissive = false;
4213 } else if (!strcmp(arg, "permissive")) {
4214 // Permissive mode; ignore errors from Mandatory SMART commands
4215 cfg.permissive = true;
4216 } else {
4217 badarg = 1;
4218 }
4219 break;
4220 case 'd':
4221 // specify the device type
4222 if ((arg = strtok(NULL, delim)) == NULL) {
4223 missingarg = 1;
4224 } else if (!strcmp(arg, "ignore")) {
4225 cfg.ignore = true;
4226 } else if (!strcmp(arg, "removable")) {
4227 cfg.removable = true;
4228 } else if (!strcmp(arg, "auto")) {
4229 cfg.dev_type = "";
4230 scan_types.clear();
4231 } else {
4232 cfg.dev_type = arg;
4233 scan_types.push_back(arg);
4234 }
4235 break;
4236 case 'F':
4237 // fix firmware bug
4238 if (!(arg = strtok(0, delim)))
4239 missingarg = 1;
4240 else if (!parse_firmwarebug_def(arg, cfg.firmwarebugs))
4241 badarg = 1;
4242 break;
4243 case 'H':
4244 // check SMART status
4245 cfg.smartcheck = true;
4246 break;
4247 case 'f':
4248 // check for failure of usage attributes
4249 cfg.usagefailed = true;
4250 break;
4251 case 't':
4252 // track changes in all vendor attributes
4253 cfg.prefail = true;
4254 cfg.usage = true;
4255 break;
4256 case 'p':
4257 // track changes in prefail vendor attributes
4258 cfg.prefail = true;
4259 break;
4260 case 'u':
4261 // track changes in usage vendor attributes
4262 cfg.usage = true;
4263 break;
4264 case 'l':
4265 // track changes in SMART logs
4266 if ((arg = strtok(NULL, delim)) == NULL) {
4267 missingarg = 1;
4268 } else if (!strcmp(arg, "selftest")) {
4269 // track changes in self-test log
4270 cfg.selftest = true;
4271 } else if (!strcmp(arg, "error")) {
4272 // track changes in ATA error log
4273 cfg.errorlog = true;
4274 } else if (!strcmp(arg, "xerror")) {
4275 // track changes in Extended Comprehensive SMART error log
4276 cfg.xerrorlog = true;
4277 } else if (!strcmp(arg, "offlinests")) {
4278 // track changes in offline data collection status
4279 cfg.offlinests = true;
4280 } else if (!strcmp(arg, "offlinests,ns")) {
4281 // track changes in offline data collection status, disable auto standby
4282 cfg.offlinests = cfg.offlinests_ns = true;
4283 } else if (!strcmp(arg, "selfteststs")) {
4284 // track changes in self-test execution status
4285 cfg.selfteststs = true;
4286 } else if (!strcmp(arg, "selfteststs,ns")) {
4287 // track changes in self-test execution status, disable auto standby
4288 cfg.selfteststs = cfg.selfteststs_ns = true;
4289 } else if (!strncmp(arg, "scterc,", sizeof("scterc,")-1)) {
4290 // set SCT Error Recovery Control
4291 unsigned rt = ~0, wt = ~0; int nc = -1;
4292 sscanf(arg,"scterc,%u,%u%n", &rt, &wt, &nc);
4293 if (nc == (int)strlen(arg) && rt <= 999 && wt <= 999) {
4294 cfg.sct_erc_set = true;
4295 cfg.sct_erc_readtime = rt;
4296 cfg.sct_erc_writetime = wt;
4297 }
4298 else
4299 badarg = 1;
4300 } else {
4301 badarg = 1;
4302 }
4303 break;
4304 case 'a':
4305 // monitor everything
4306 cfg.smartcheck = true;
4307 cfg.prefail = true;
4308 cfg.usagefailed = true;
4309 cfg.usage = true;
4310 cfg.selftest = true;
4311 cfg.errorlog = true;
4312 cfg.selfteststs = true;
4313 break;
4314 case 'o':
4315 // automatic offline testing enable/disable
4316 if ((arg = strtok(NULL, delim)) == NULL) {
4317 missingarg = 1;
4318 } else if (!strcmp(arg, "on")) {
4319 cfg.autoofflinetest = 2;
4320 } else if (!strcmp(arg, "off")) {
4321 cfg.autoofflinetest = 1;
4322 } else {
4323 badarg = 1;
4324 }
4325 break;
4326 case 'n':
4327 // skip disk check if in idle or standby mode
4328 if (!(arg = strtok(NULL, delim)))
4329 missingarg = 1;
4330 else {
4331 char *endptr = NULL;
4332 char *next = strchr(const_cast<char*>(arg), ',');
4333
4334 cfg.powerquiet = false;
4335 cfg.powerskipmax = 0;
4336
4337 if (next!=NULL) *next='\0';
4338 if (!strcmp(arg, "never"))
4339 cfg.powermode = 0;
4340 else if (!strcmp(arg, "sleep"))
4341 cfg.powermode = 1;
4342 else if (!strcmp(arg, "standby"))
4343 cfg.powermode = 2;
4344 else if (!strcmp(arg, "idle"))
4345 cfg.powermode = 3;
4346 else
4347 badarg = 1;
4348
4349 // if optional arguments are present
4350 if (!badarg && next!=NULL) {
4351 next++;
4352 cfg.powerskipmax = strtol(next, &endptr, 10);
4353 if (endptr == next)
4354 cfg.powerskipmax = 0;
4355 else {
4356 next = endptr + (*endptr != '\0');
4357 if (cfg.powerskipmax <= 0)
4358 badarg = 1;
4359 }
4360 if (*next != '\0') {
4361 if (!strcmp("q", next))
4362 cfg.powerquiet = true;
4363 else {
4364 badarg = 1;
4365 }
4366 }
4367 }
4368 }
4369 break;
4370 case 'S':
4371 // automatic attribute autosave enable/disable
4372 if ((arg = strtok(NULL, delim)) == NULL) {
4373 missingarg = 1;
4374 } else if (!strcmp(arg, "on")) {
4375 cfg.autosave = 2;
4376 } else if (!strcmp(arg, "off")) {
4377 cfg.autosave = 1;
4378 } else {
4379 badarg = 1;
4380 }
4381 break;
4382 case 's':
4383 // warn user, and delete any previously given -s REGEXP Directives
4384 if (!cfg.test_regex.empty()){
4385 PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous Test Directive -s %s\n",
4386 configfile, lineno, name, cfg.test_regex.get_pattern());
4387 cfg.test_regex = regular_expression();
4388 }
4389 // check for missing argument
4390 if (!(arg = strtok(NULL, delim))) {
4391 missingarg = 1;
4392 }
4393 // Compile regex
4394 else {
4395 if (!cfg.test_regex.compile(arg)) {
4396 // not a valid regular expression!
4397 PrintOut(LOG_CRIT, "File %s line %d (drive %s): -s argument \"%s\" is INVALID extended regular expression. %s.\n",
4398 configfile, lineno, name, arg, cfg.test_regex.get_errmsg());
4399 return -1;
4400 }
4401 // Do a bit of sanity checking and warn user if we think that
4402 // their regexp is "strange". User probably confused about shell
4403 // glob(3) syntax versus regular expression syntax regexp(7).
4404 // Check also for possible invalid number of digits in ':NNN[-LLL]' suffix.
4405 static const regular_expression syntax_check(
4406 "[^]$()*+./:?^[|0-9LSCOncr-]+|"
4407 ":[0-9]{0,2}($|[^0-9])|:[0-9]{4,}|"
4408 ":[0-9]{3}-(000|[0-9]{0,2}($|[^0-9])|[0-9]{4,})"
4409 );
4410 regular_expression::match_range range;
4411 if (syntax_check.execute(arg, 1, &range) && 0 <= range.rm_so && range.rm_so < range.rm_eo)
4412 PrintOut(LOG_INFO, "File %s line %d (drive %s): warning, \"%.*s\" looks odd in "
4413 "extended regular expression \"%s\"\n",
4414 configfile, lineno, name, (int)(range.rm_eo - range.rm_so), arg + range.rm_so, arg);
4415 }
4416 break;
4417 case 'm':
4418 // send email to address that follows
4419 if (!(arg = strtok(NULL,delim)))
4420 missingarg = 1;
4421 else {
4422 if (!cfg.emailaddress.empty())
4423 PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous Address Directive -m %s\n",
4424 configfile, lineno, name, cfg.emailaddress.c_str());
4425 cfg.emailaddress = arg;
4426 }
4427 break;
4428 case 'M':
4429 // email warning options
4430 if (!(arg = strtok(NULL, delim)))
4431 missingarg = 1;
4432 else if (!strcmp(arg, "once"))
4433 cfg.emailfreq = 1;
4434 else if (!strcmp(arg, "daily"))
4435 cfg.emailfreq = 2;
4436 else if (!strcmp(arg, "diminishing"))
4437 cfg.emailfreq = 3;
4438 else if (!strcmp(arg, "test"))
4439 cfg.emailtest = 1;
4440 else if (!strcmp(arg, "exec")) {
4441 // Get the next argument (the command line)
4442 #ifdef _WIN32
4443 // Allow "/path name/with spaces/..." on Windows
4444 arg = strtok_dequote(delim);
4445 if (arg && arg[0] == '"') {
4446 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Directive %s 'exec' argument: missing closing quote\n",
4447 configfile, lineno, name, token);
4448 return -1;
4449 }
4450 #else
4451 arg = strtok(0, delim);
4452 #endif
4453 if (!arg) {
4454 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Directive %s 'exec' argument must be followed by executable path.\n",
4455 configfile, lineno, name, token);
4456 return -1;
4457 }
4458 // Free the last cmd line given if any, and copy new one
4459 if (!cfg.emailcmdline.empty())
4460 PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous mail Directive -M exec %s\n",
4461 configfile, lineno, name, cfg.emailcmdline.c_str());
4462 cfg.emailcmdline = arg;
4463 }
4464 else
4465 badarg = 1;
4466 break;
4467 case 'i':
4468 // ignore failure of usage attribute
4469 if ((val=GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255))<0)
4470 return -1;
4471 cfg.monitor_attr_flags.set(val, MONITOR_IGN_FAILUSE);
4472 break;
4473 case 'I':
4474 // ignore attribute for tracking purposes
4475 if ((val=GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255))<0)
4476 return -1;
4477 cfg.monitor_attr_flags.set(val, MONITOR_IGNORE);
4478 break;
4479 case 'r':
4480 // print raw value when tracking
4481 if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255, excl)) < 0)
4482 return -1;
4483 cfg.monitor_attr_flags.set(val, MONITOR_RAW_PRINT);
4484 if (*excl == '!') // attribute change is critical
4485 cfg.monitor_attr_flags.set(val, MONITOR_AS_CRIT);
4486 break;
4487 case 'R':
4488 // track changes in raw value (forces printing of raw value)
4489 if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255, excl)) < 0)
4490 return -1;
4491 cfg.monitor_attr_flags.set(val, MONITOR_RAW_PRINT|MONITOR_RAW);
4492 if (*excl == '!') // raw value change is critical
4493 cfg.monitor_attr_flags.set(val, MONITOR_RAW_AS_CRIT);
4494 break;
4495 case 'W':
4496 // track Temperature
4497 if (Get3Integers(arg=strtok(NULL, delim), name, token, lineno, configfile,
4498 &cfg.tempdiff, &cfg.tempinfo, &cfg.tempcrit) < 0)
4499 return -1;
4500 break;
4501 case 'v':
4502 // non-default vendor-specific attribute meaning
4503 if (!(arg=strtok(NULL,delim))) {
4504 missingarg = 1;
4505 } else if (!parse_attribute_def(arg, cfg.attribute_defs, PRIOR_USER)) {
4506 badarg = 1;
4507 }
4508 break;
4509 case 'P':
4510 // Define use of drive-specific presets.
4511 if (!(arg = strtok(NULL, delim))) {
4512 missingarg = 1;
4513 } else if (!strcmp(arg, "use")) {
4514 cfg.ignorepresets = false;
4515 } else if (!strcmp(arg, "ignore")) {
4516 cfg.ignorepresets = true;
4517 } else if (!strcmp(arg, "show")) {
4518 cfg.showpresets = true;
4519 } else if (!strcmp(arg, "showall")) {
4520 showallpresets();
4521 } else {
4522 badarg = 1;
4523 }
4524 break;
4525
4526 case 'e':
4527 // Various ATA settings
4528 if (!(arg = strtok(NULL, delim))) {
4529 missingarg = true;
4530 }
4531 else {
4532 char arg2[16+1]; unsigned val;
4533 int n1 = -1, n2 = -1, n3 = -1, len = strlen(arg);
4534 if (sscanf(arg, "%16[^,=]%n%*[,=]%n%u%n", arg2, &n1, &n2, &val, &n3) >= 1
4535 && (n1 == len || n2 > 0)) {
4536 bool on = (n2 > 0 && !strcmp(arg+n2, "on"));
4537 bool off = (n2 > 0 && !strcmp(arg+n2, "off"));
4538 if (n3 != len)
4539 val = ~0U;
4540
4541 if (!strcmp(arg2, "aam")) {
4542 if (off)
4543 cfg.set_aam = -1;
4544 else if (val <= 254)
4545 cfg.set_aam = val + 1;
4546 else
4547 badarg = true;
4548 }
4549 else if (!strcmp(arg2, "apm")) {
4550 if (off)
4551 cfg.set_apm = -1;
4552 else if (1 <= val && val <= 254)
4553 cfg.set_apm = val + 1;
4554 else
4555 badarg = true;
4556 }
4557 else if (!strcmp(arg2, "lookahead")) {
4558 if (off)
4559 cfg.set_lookahead = -1;
4560 else if (on)
4561 cfg.set_lookahead = 1;
4562 else
4563 badarg = true;
4564 }
4565 else if (!strcmp(arg, "security-freeze")) {
4566 cfg.set_security_freeze = true;
4567 }
4568 else if (!strcmp(arg2, "standby")) {
4569 if (off)
4570 cfg.set_standby = 0 + 1;
4571 else if (val <= 255)
4572 cfg.set_standby = val + 1;
4573 else
4574 badarg = true;
4575 }
4576 else if (!strcmp(arg2, "wcache")) {
4577 if (off)
4578 cfg.set_wcache = -1;
4579 else if (on)
4580 cfg.set_wcache = 1;
4581 else
4582 badarg = true;
4583 }
4584 else if (!strcmp(arg2, "dsn")) {
4585 if (off)
4586 cfg.set_dsn = -1;
4587 else if (on)
4588 cfg.set_dsn = 1;
4589 else
4590 badarg = true;
4591 }
4592 else
4593 badarg = true;
4594 }
4595 else
4596 badarg = true;
4597 }
4598 break;
4599
4600 default:
4601 // Directive not recognized
4602 PrintOut(LOG_CRIT,"File %s line %d (drive %s): unknown Directive: %s\n",
4603 configfile, lineno, name, token);
4604 Directives();
4605 return -1;
4606 }
4607 if (missingarg) {
4608 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Missing argument to %s Directive\n",
4609 configfile, lineno, name, token);
4610 }
4611 if (badarg) {
4612 PrintOut(LOG_CRIT, "File %s line %d (drive %s): Invalid argument to %s Directive: %s\n",
4613 configfile, lineno, name, token, arg);
4614 }
4615 if (missingarg || badarg) {
4616 PrintOut(LOG_CRIT, "Valid arguments to %s Directive are: ", token);
4617 printoutvaliddirectiveargs(LOG_CRIT, sym);
4618 PrintOut(LOG_CRIT, "\n");
4619 return -1;
4620 }
4621
4622 return 1;
4623 }
4624
4625 // Scan directive for configuration file
4626 #define SCANDIRECTIVE "DEVICESCAN"
4627
4628 // This is the routine that adds things to the conf_entries list.
4629 //
4630 // Return values are:
4631 // 1: parsed a normal line
4632 // 0: found DEFAULT setting or comment or blank line
4633 // -1: found SCANDIRECTIVE line
4634 // -2: found an error
4635 //
4636 // Note: this routine modifies *line from the caller!
ParseConfigLine(dev_config_vector & conf_entries,dev_config & default_conf,smart_devtype_list & scan_types,int lineno,char * line)4637 static int ParseConfigLine(dev_config_vector & conf_entries, dev_config & default_conf,
4638 smart_devtype_list & scan_types, int lineno, /*const*/ char * line)
4639 {
4640 const char *delim = " \n\t";
4641
4642 // get first token: device name. If a comment, skip line
4643 const char * name = strtok(line, delim);
4644 if (!name || *name == '#')
4645 return 0;
4646
4647 // Check device name for DEFAULT or DEVICESCAN
4648 int retval;
4649 if (!strcmp("DEFAULT", name)) {
4650 retval = 0;
4651 // Restart with empty defaults
4652 default_conf = dev_config();
4653 }
4654 else {
4655 retval = (!strcmp(SCANDIRECTIVE, name) ? -1 : 1);
4656 // Init new entry with current defaults
4657 conf_entries.push_back(default_conf);
4658 }
4659 dev_config & cfg = (retval ? conf_entries.back() : default_conf);
4660
4661 cfg.name = name; // Later replaced by dev->get_info().info_name
4662 cfg.dev_name = name; // If DEVICESCAN later replaced by get->dev_info().dev_name
4663 cfg.lineno = lineno;
4664
4665 // parse tokens one at a time from the file.
4666 while (char * token = strtok(0, delim)) {
4667 int rc = ParseToken(token, cfg, scan_types);
4668 if (rc < 0)
4669 // error found on the line
4670 return -2;
4671
4672 if (rc == 0)
4673 // No tokens left
4674 break;
4675
4676 // PrintOut(LOG_INFO,"Parsed token %s\n",token);
4677 }
4678
4679 // Check for multiple -d TYPE directives
4680 if (retval != -1 && scan_types.size() > 1) {
4681 PrintOut(LOG_CRIT, "Drive: %s, invalid multiple -d TYPE Directives on line %d of file %s\n",
4682 cfg.name.c_str(), cfg.lineno, configfile);
4683 return -2;
4684 }
4685
4686 // Don't perform checks below for DEFAULT entries
4687 if (retval == 0)
4688 return retval;
4689
4690 // If NO monitoring directives are set, then set all of them.
4691 if (!( cfg.smartcheck || cfg.selftest
4692 || cfg.errorlog || cfg.xerrorlog
4693 || cfg.offlinests || cfg.selfteststs
4694 || cfg.usagefailed || cfg.prefail || cfg.usage
4695 || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)) {
4696
4697 PrintOut(LOG_INFO,"Drive: %s, implied '-a' Directive on line %d of file %s\n",
4698 cfg.name.c_str(), cfg.lineno, configfile);
4699
4700 cfg.smartcheck = true;
4701 cfg.usagefailed = true;
4702 cfg.prefail = true;
4703 cfg.usage = true;
4704 cfg.selftest = true;
4705 cfg.errorlog = true;
4706 cfg.selfteststs = true;
4707 }
4708
4709 // additional sanity check. Has user set -M options without -m?
4710 if (cfg.emailaddress.empty() && (!cfg.emailcmdline.empty() || cfg.emailfreq || cfg.emailtest)){
4711 PrintOut(LOG_CRIT,"Drive: %s, -M Directive(s) on line %d of file %s need -m ADDRESS Directive\n",
4712 cfg.name.c_str(), cfg.lineno, configfile);
4713 return -2;
4714 }
4715
4716 // has the user has set <nomailer>?
4717 if (cfg.emailaddress == "<nomailer>") {
4718 // check that -M exec is also set
4719 if (cfg.emailcmdline.empty()){
4720 PrintOut(LOG_CRIT,"Drive: %s, -m <nomailer> Directive on line %d of file %s needs -M exec Directive\n",
4721 cfg.name.c_str(), cfg.lineno, configfile);
4722 return -2;
4723 }
4724 // From here on the sign of <nomailer> is cfg.emailaddress.empty() and !cfg.emailcmdline.empty()
4725 cfg.emailaddress.clear();
4726 }
4727
4728 return retval;
4729 }
4730
4731 // Parses a configuration file. Return values are:
4732 // N=>0: found N entries
4733 // -1: syntax error in config file
4734 // -2: config file does not exist
4735 // -3: config file exists but cannot be read
4736 //
4737 // In the case where the return value is 0, there are three
4738 // possibilities:
4739 // Empty configuration file ==> conf_entries.empty()
4740 // No configuration file ==> conf_entries[0].lineno == 0
4741 // SCANDIRECTIVE found ==> conf_entries.back().lineno != 0 (size >= 1)
ParseConfigFile(dev_config_vector & conf_entries,smart_devtype_list & scan_types)4742 static int ParseConfigFile(dev_config_vector & conf_entries, smart_devtype_list & scan_types)
4743 {
4744 // maximum line length in configuration file
4745 const int MAXLINELEN = 256;
4746 // maximum length of a continued line in configuration file
4747 const int MAXCONTLINE = 1023;
4748
4749 stdio_file f;
4750 // Open config file, if it exists and is not <stdin>
4751 if (!(configfile == configfile_stdin)) { // pointer comparison ok here
4752 if (!f.open(configfile,"r") && (errno!=ENOENT || !configfile_alt.empty())) {
4753 // file exists but we can't read it or it should exist due to '-c' option
4754 int ret = (errno!=ENOENT ? -3 : -2);
4755 PrintOut(LOG_CRIT,"%s: Unable to open configuration file %s\n",
4756 strerror(errno),configfile);
4757 return ret;
4758 }
4759 }
4760 else // read from stdin ('-c -' option)
4761 f.open(stdin);
4762
4763 // Start with empty defaults
4764 dev_config default_conf;
4765
4766 // No configuration file found -- use fake one
4767 int entry = 0;
4768 if (!f) {
4769 char fakeconfig[] = SCANDIRECTIVE " -a"; // TODO: Remove this hack, build cfg_entry.
4770
4771 if (ParseConfigLine(conf_entries, default_conf, scan_types, 0, fakeconfig) != -1)
4772 throw std::logic_error("Internal error parsing " SCANDIRECTIVE);
4773 return 0;
4774 }
4775
4776 #ifdef __CYGWIN__
4777 setmode(fileno(f), O_TEXT); // Allow files with \r\n
4778 #endif
4779
4780 // configuration file exists
4781 PrintOut(LOG_INFO,"Opened configuration file %s\n",configfile);
4782
4783 // parse config file line by line
4784 int lineno = 1, cont = 0, contlineno = 0;
4785 char line[MAXLINELEN+2];
4786 char fullline[MAXCONTLINE+1];
4787
4788 for (;;) {
4789 int len=0,scandevice;
4790 char *lastslash;
4791 char *comment;
4792 char *code;
4793
4794 // make debugging simpler
4795 memset(line,0,sizeof(line));
4796
4797 // get a line
4798 code=fgets(line, MAXLINELEN+2, f);
4799
4800 // are we at the end of the file?
4801 if (!code){
4802 if (cont) {
4803 scandevice = ParseConfigLine(conf_entries, default_conf, scan_types, contlineno, fullline);
4804 // See if we found a SCANDIRECTIVE directive
4805 if (scandevice==-1)
4806 return 0;
4807 // did we find a syntax error
4808 if (scandevice==-2)
4809 return -1;
4810 // the final line is part of a continuation line
4811 entry+=scandevice;
4812 }
4813 break;
4814 }
4815
4816 // input file line number
4817 contlineno++;
4818
4819 // See if line is too long
4820 len=strlen(line);
4821 if (len>MAXLINELEN){
4822 const char *warn;
4823 if (line[len-1]=='\n')
4824 warn="(including newline!) ";
4825 else
4826 warn="";
4827 PrintOut(LOG_CRIT,"Error: line %d of file %s %sis more than MAXLINELEN=%d characters.\n",
4828 (int)contlineno,configfile,warn,(int)MAXLINELEN);
4829 return -1;
4830 }
4831
4832 // Ignore anything after comment symbol
4833 if ((comment=strchr(line,'#'))){
4834 *comment='\0';
4835 len=strlen(line);
4836 }
4837
4838 // is the total line (made of all continuation lines) too long?
4839 if (cont+len>MAXCONTLINE){
4840 PrintOut(LOG_CRIT,"Error: continued line %d (actual line %d) of file %s is more than MAXCONTLINE=%d characters.\n",
4841 lineno, (int)contlineno, configfile, (int)MAXCONTLINE);
4842 return -1;
4843 }
4844
4845 // copy string so far into fullline, and increment length
4846 snprintf(fullline+cont, sizeof(fullline)-cont, "%s" ,line);
4847 cont+=len;
4848
4849 // is this a continuation line. If so, replace \ by space and look at next line
4850 if ( (lastslash=strrchr(line,'\\')) && !strtok(lastslash+1," \n\t")){
4851 *(fullline+(cont-len)+(lastslash-line))=' ';
4852 continue;
4853 }
4854
4855 // Not a continuation line. Parse it
4856 scan_types.clear();
4857 scandevice = ParseConfigLine(conf_entries, default_conf, scan_types, contlineno, fullline);
4858
4859 // did we find a scandevice directive?
4860 if (scandevice==-1)
4861 return 0;
4862 // did we find a syntax error
4863 if (scandevice==-2)
4864 return -1;
4865
4866 entry+=scandevice;
4867 lineno++;
4868 cont=0;
4869 }
4870
4871 // note -- may be zero if syntax of file OK, but no valid entries!
4872 return entry;
4873 }
4874
4875 /* Prints the message "=======> VALID ARGUMENTS ARE: <LIST> <=======\n", where
4876 <LIST> is the list of valid arguments for option opt. */
PrintValidArgs(char opt)4877 static void PrintValidArgs(char opt)
4878 {
4879 const char *s;
4880
4881 PrintOut(LOG_CRIT, "=======> VALID ARGUMENTS ARE: ");
4882 if (!(s = GetValidArgList(opt)))
4883 PrintOut(LOG_CRIT, "Error constructing argument list for option %c", opt);
4884 else
4885 PrintOut(LOG_CRIT, "%s", (char *)s);
4886 PrintOut(LOG_CRIT, " <=======\n");
4887 }
4888
4889 #ifndef _WIN32
4890 // Report error and return false if specified path is not absolute.
check_abs_path(char option,const std::string & path)4891 static bool check_abs_path(char option, const std::string & path)
4892 {
4893 if (path.empty() || path[0] == '/')
4894 return true;
4895
4896 debugmode = 1;
4897 PrintHead();
4898 PrintOut(LOG_CRIT, "=======> INVALID ARGUMENT TO -%c: %s <=======\n\n", option, path.c_str());
4899 PrintOut(LOG_CRIT, "Error: relative path names are not allowed\n\n");
4900 return false;
4901 }
4902 #endif // !_WIN32
4903
4904 // Parses input line, prints usage message and
4905 // version/license/copyright messages
parse_options(int argc,char ** argv)4906 static int parse_options(int argc, char **argv)
4907 {
4908 // Init default path names
4909 #ifndef _WIN32
4910 configfile = SMARTMONTOOLS_SYSCONFDIR "/smartd.conf";
4911 warning_script = SMARTMONTOOLS_SMARTDSCRIPTDIR "/smartd_warning.sh";
4912 #else
4913 std::string exedir = get_exe_dir();
4914 static std::string configfile_str = exedir + "/smartd.conf";
4915 configfile = configfile_str.c_str();
4916 warning_script = exedir + "/smartd_warning.cmd";
4917 #endif
4918
4919 // Please update GetValidArgList() if you edit shortopts
4920 static const char shortopts[] = "c:l:q:dDni:p:r:s:A:B:w:Vh?"
4921 #ifdef HAVE_LIBCAP_NG
4922 "C"
4923 #endif
4924 ;
4925 // Please update GetValidArgList() if you edit longopts
4926 struct option longopts[] = {
4927 { "configfile", required_argument, 0, 'c' },
4928 { "logfacility", required_argument, 0, 'l' },
4929 { "quit", required_argument, 0, 'q' },
4930 { "debug", no_argument, 0, 'd' },
4931 { "showdirectives", no_argument, 0, 'D' },
4932 { "interval", required_argument, 0, 'i' },
4933 #ifndef _WIN32
4934 { "no-fork", no_argument, 0, 'n' },
4935 #else
4936 { "service", no_argument, 0, 'n' },
4937 #endif
4938 { "pidfile", required_argument, 0, 'p' },
4939 { "report", required_argument, 0, 'r' },
4940 { "savestates", required_argument, 0, 's' },
4941 { "attributelog", required_argument, 0, 'A' },
4942 { "drivedb", required_argument, 0, 'B' },
4943 { "warnexec", required_argument, 0, 'w' },
4944 { "version", no_argument, 0, 'V' },
4945 { "license", no_argument, 0, 'V' },
4946 { "copyright", no_argument, 0, 'V' },
4947 { "help", no_argument, 0, 'h' },
4948 { "usage", no_argument, 0, 'h' },
4949 #ifdef HAVE_LIBCAP_NG
4950 { "capabilities", no_argument, 0, 'C' },
4951 #endif
4952 { 0, 0, 0, 0 }
4953 };
4954
4955 opterr=optopt=0;
4956 bool badarg = false;
4957 bool use_default_db = true; // set false on '-B FILE'
4958
4959 // Parse input options.
4960 int optchar;
4961 while ((optchar = getopt_long(argc, argv, shortopts, longopts, NULL)) != -1) {
4962 char *arg;
4963 char *tailptr;
4964 long lchecktime;
4965
4966 switch(optchar) {
4967 case 'q':
4968 // when to quit
4969 if (!strcmp(optarg, "nodev"))
4970 quit = QUIT_NODEV;
4971 else if (!strcmp(optarg, "nodevstartup"))
4972 quit = QUIT_NODEVSTARTUP;
4973 else if (!strcmp(optarg, "never"))
4974 quit = QUIT_NEVER;
4975 else if (!strcmp(optarg, "onecheck")) {
4976 quit = QUIT_ONECHECK;
4977 debugmode = 1;
4978 }
4979 else if (!strcmp(optarg, "showtests")) {
4980 quit = QUIT_SHOWTESTS;
4981 debugmode = 1;
4982 }
4983 else if (!strcmp(optarg, "errors"))
4984 quit = QUIT_ERRORS;
4985 else
4986 badarg = true;
4987 break;
4988 case 'l':
4989 // set the log facility level
4990 if (!strcmp(optarg, "daemon"))
4991 facility=LOG_DAEMON;
4992 else if (!strcmp(optarg, "local0"))
4993 facility=LOG_LOCAL0;
4994 else if (!strcmp(optarg, "local1"))
4995 facility=LOG_LOCAL1;
4996 else if (!strcmp(optarg, "local2"))
4997 facility=LOG_LOCAL2;
4998 else if (!strcmp(optarg, "local3"))
4999 facility=LOG_LOCAL3;
5000 else if (!strcmp(optarg, "local4"))
5001 facility=LOG_LOCAL4;
5002 else if (!strcmp(optarg, "local5"))
5003 facility=LOG_LOCAL5;
5004 else if (!strcmp(optarg, "local6"))
5005 facility=LOG_LOCAL6;
5006 else if (!strcmp(optarg, "local7"))
5007 facility=LOG_LOCAL7;
5008 else
5009 badarg = true;
5010 break;
5011 case 'd':
5012 // enable debug mode
5013 debugmode = 1;
5014 break;
5015 case 'n':
5016 // don't fork()
5017 #ifndef _WIN32 // On Windows, --service is already handled by daemon_main()
5018 do_fork = false;
5019 #endif
5020 break;
5021 case 'D':
5022 // print summary of all valid directives
5023 debugmode = 1;
5024 Directives();
5025 return 0;
5026 case 'i':
5027 // Period (time interval) for checking
5028 // strtol will set errno in the event of overflow, so we'll check it.
5029 errno = 0;
5030 lchecktime = strtol(optarg, &tailptr, 10);
5031 if (*tailptr != '\0' || lchecktime < 10 || lchecktime > INT_MAX || errno) {
5032 debugmode=1;
5033 PrintHead();
5034 PrintOut(LOG_CRIT, "======> INVALID INTERVAL: %s <=======\n", optarg);
5035 PrintOut(LOG_CRIT, "======> INTERVAL MUST BE INTEGER BETWEEN %d AND %d <=======\n", 10, INT_MAX);
5036 PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
5037 return EXIT_BADCMD;
5038 }
5039 checktime = (int)lchecktime;
5040 break;
5041 case 'r':
5042 // report IOCTL transactions
5043 {
5044 int n1 = -1, n2 = -1, len = strlen(optarg);
5045 char s[9+1]; unsigned i = 1;
5046 sscanf(optarg, "%9[a-z]%n,%u%n", s, &n1, &i, &n2);
5047 if (!((n1 == len || n2 == len) && 1 <= i && i <= 4)) {
5048 badarg = true;
5049 } else if (!strcmp(s,"ioctl")) {
5050 ata_debugmode = scsi_debugmode = nvme_debugmode = i;
5051 } else if (!strcmp(s,"ataioctl")) {
5052 ata_debugmode = i;
5053 } else if (!strcmp(s,"scsiioctl")) {
5054 scsi_debugmode = i;
5055 } else if (!strcmp(s,"nvmeioctl")) {
5056 nvme_debugmode = i;
5057 } else {
5058 badarg = true;
5059 }
5060 }
5061 break;
5062 case 'c':
5063 // alternate configuration file
5064 if (strcmp(optarg,"-"))
5065 configfile = (configfile_alt = optarg).c_str();
5066 else // read from stdin
5067 configfile=configfile_stdin;
5068 break;
5069 case 'p':
5070 // output file with PID number
5071 pid_file = optarg;
5072 break;
5073 case 's':
5074 // path prefix of persistent state file
5075 state_path_prefix = optarg;
5076 break;
5077 case 'A':
5078 // path prefix of attribute log file
5079 attrlog_path_prefix = optarg;
5080 break;
5081 case 'B':
5082 {
5083 const char * path = optarg;
5084 if (*path == '+' && path[1])
5085 path++;
5086 else
5087 use_default_db = false;
5088 unsigned char savedebug = debugmode; debugmode = 1;
5089 if (!read_drive_database(path))
5090 return EXIT_BADCMD;
5091 debugmode = savedebug;
5092 }
5093 break;
5094 case 'w':
5095 warning_script = optarg;
5096 break;
5097 case 'V':
5098 // print version and CVS info
5099 debugmode = 1;
5100 PrintOut(LOG_INFO, "%s", format_version_info("smartd", true /*full*/).c_str());
5101 return 0;
5102 #ifdef HAVE_LIBCAP_NG
5103 case 'C':
5104 // enable capabilities
5105 capabilities_enabled = true;
5106 break;
5107 #endif
5108 case 'h':
5109 // help: print summary of command-line options
5110 debugmode=1;
5111 PrintHead();
5112 Usage();
5113 return 0;
5114 case '?':
5115 default:
5116 // unrecognized option
5117 debugmode=1;
5118 PrintHead();
5119 // Point arg to the argument in which this option was found.
5120 arg = argv[optind-1];
5121 // Check whether the option is a long option that doesn't map to -h.
5122 if (arg[1] == '-' && optchar != 'h') {
5123 // Iff optopt holds a valid option then argument must be missing.
5124 if (optopt && (strchr(shortopts, optopt) != NULL)) {
5125 PrintOut(LOG_CRIT, "=======> ARGUMENT REQUIRED FOR OPTION: %s <=======\n",arg+2);
5126 PrintValidArgs(optopt);
5127 } else {
5128 PrintOut(LOG_CRIT, "=======> UNRECOGNIZED OPTION: %s <=======\n\n",arg+2);
5129 }
5130 PrintOut(LOG_CRIT, "\nUse smartd --help to get a usage summary\n\n");
5131 return EXIT_BADCMD;
5132 }
5133 if (optopt) {
5134 // Iff optopt holds a valid option then argument must be missing.
5135 if (strchr(shortopts, optopt) != NULL){
5136 PrintOut(LOG_CRIT, "=======> ARGUMENT REQUIRED FOR OPTION: %c <=======\n",optopt);
5137 PrintValidArgs(optopt);
5138 } else {
5139 PrintOut(LOG_CRIT, "=======> UNRECOGNIZED OPTION: %c <=======\n\n",optopt);
5140 }
5141 PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
5142 return EXIT_BADCMD;
5143 }
5144 Usage();
5145 return 0;
5146 }
5147
5148 // Check to see if option had an unrecognized or incorrect argument.
5149 if (badarg) {
5150 debugmode=1;
5151 PrintHead();
5152 // It would be nice to print the actual option name given by the user
5153 // here, but we just print the short form. Please fix this if you know
5154 // a clean way to do it.
5155 PrintOut(LOG_CRIT, "=======> INVALID ARGUMENT TO -%c: %s <======= \n", optchar, optarg);
5156 PrintValidArgs(optchar);
5157 PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
5158 return EXIT_BADCMD;
5159 }
5160 }
5161
5162 // non-option arguments are not allowed
5163 if (argc > optind) {
5164 debugmode=1;
5165 PrintHead();
5166 PrintOut(LOG_CRIT, "=======> UNRECOGNIZED ARGUMENT: %s <=======\n\n", argv[optind]);
5167 PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
5168 return EXIT_BADCMD;
5169 }
5170
5171 // no pidfile in debug mode
5172 if (debugmode && !pid_file.empty()) {
5173 debugmode=1;
5174 PrintHead();
5175 PrintOut(LOG_CRIT, "=======> INVALID CHOICE OF OPTIONS: -d and -p <======= \n\n");
5176 PrintOut(LOG_CRIT, "Error: pid file %s not written in debug (-d) mode\n\n", pid_file.c_str());
5177 return EXIT_BADCMD;
5178 }
5179
5180 #ifndef _WIN32
5181 if (!debugmode) {
5182 // absolute path names are required due to chdir('/') in daemon_init()
5183 if (!( check_abs_path('p', pid_file)
5184 && check_abs_path('s', state_path_prefix)
5185 && check_abs_path('A', attrlog_path_prefix)))
5186 return EXIT_BADCMD;
5187 }
5188 #endif
5189
5190 // Read or init drive database
5191 {
5192 unsigned char savedebug = debugmode; debugmode = 1;
5193 if (!init_drive_database(use_default_db))
5194 return EXIT_BADCMD;
5195 debugmode = savedebug;
5196 }
5197
5198 // Check option compatibility of notify support
5199 // cppcheck-suppress knownConditionTrueFalse
5200 if (!notify_post_init())
5201 return EXIT_BADCMD;
5202
5203 // print header
5204 PrintHead();
5205
5206 // No error, continue in main_worker()
5207 return -1;
5208 }
5209
5210 // Function we call if no configuration file was found or if the
5211 // SCANDIRECTIVE Directive was found. It makes entries for device
5212 // names returned by scan_smart_devices() in os_OSNAME.cpp
MakeConfigEntries(const dev_config & base_cfg,dev_config_vector & conf_entries,smart_device_list & scanned_devs,const smart_devtype_list & types)5213 static int MakeConfigEntries(const dev_config & base_cfg,
5214 dev_config_vector & conf_entries, smart_device_list & scanned_devs,
5215 const smart_devtype_list & types)
5216 {
5217 // make list of devices
5218 smart_device_list devlist;
5219 if (!smi()->scan_smart_devices(devlist, types)) {
5220 PrintOut(LOG_CRIT, "DEVICESCAN failed: %s\n", smi()->get_errmsg());
5221 return 0;
5222 }
5223
5224 // if no devices, return
5225 if (devlist.size() <= 0)
5226 return 0;
5227
5228 // add empty device slots for existing config entries
5229 while (scanned_devs.size() < conf_entries.size())
5230 scanned_devs.push_back((smart_device *)0);
5231
5232 // loop over entries to create
5233 for (unsigned i = 0; i < devlist.size(); i++) {
5234 // Move device pointer
5235 smart_device * dev = devlist.release(i);
5236 scanned_devs.push_back(dev);
5237
5238 // Append configuration and update names
5239 conf_entries.push_back(base_cfg);
5240 dev_config & cfg = conf_entries.back();
5241 cfg.name = dev->get_info().info_name;
5242 cfg.dev_name = dev->get_info().dev_name;
5243
5244 // Set type only if scanning is limited to specific types
5245 // This is later used to set SMARTD_DEVICETYPE environment variable
5246 if (!types.empty())
5247 cfg.dev_type = dev->get_info().dev_type;
5248 else // SMARTD_DEVICETYPE=auto
5249 cfg.dev_type.clear();
5250 }
5251
5252 return devlist.size();
5253 }
5254
5255 // Returns negative value (see ParseConfigFile()) if config file
5256 // had errors, else number of entries which may be zero or positive.
ReadOrMakeConfigEntries(dev_config_vector & conf_entries,smart_device_list & scanned_devs)5257 static int ReadOrMakeConfigEntries(dev_config_vector & conf_entries, smart_device_list & scanned_devs)
5258 {
5259 // parse configuration file configfile (normally /etc/smartd.conf)
5260 smart_devtype_list scan_types;
5261 int entries = ParseConfigFile(conf_entries, scan_types);
5262
5263 if (entries < 0) {
5264 // There was an error reading the configuration file.
5265 conf_entries.clear();
5266 if (entries == -1)
5267 PrintOut(LOG_CRIT, "Configuration file %s has fatal syntax errors.\n", configfile);
5268 return entries;
5269 }
5270
5271 // no error parsing config file.
5272 if (entries) {
5273 // we did not find a SCANDIRECTIVE and did find valid entries
5274 PrintOut(LOG_INFO, "Configuration file %s parsed.\n", configfile);
5275 }
5276 else if (!conf_entries.empty()) {
5277 // we found a SCANDIRECTIVE or there was no configuration file so
5278 // scan. Configuration file's last entry contains all options
5279 // that were set
5280 dev_config first = conf_entries.back();
5281 conf_entries.pop_back();
5282
5283 if (first.lineno)
5284 PrintOut(LOG_INFO,"Configuration file %s was parsed, found %s, scanning devices\n", configfile, SCANDIRECTIVE);
5285 else
5286 PrintOut(LOG_INFO,"No configuration file %s found, scanning devices\n", configfile);
5287
5288 // make config list of devices to search for
5289 MakeConfigEntries(first, conf_entries, scanned_devs, scan_types);
5290
5291 // warn user if scan table found no devices
5292 if (conf_entries.empty())
5293 PrintOut(LOG_CRIT,"In the system's table of devices NO devices found to scan\n");
5294 }
5295 else
5296 PrintOut(LOG_CRIT, "Configuration file %s parsed but has no entries\n", configfile);
5297
5298 return conf_entries.size();
5299 }
5300
5301 // Register one device, return false on error
register_device(dev_config & cfg,dev_state & state,smart_device_auto_ptr & dev,const dev_config_vector * prev_cfgs)5302 static bool register_device(dev_config & cfg, dev_state & state, smart_device_auto_ptr & dev,
5303 const dev_config_vector * prev_cfgs)
5304 {
5305 bool scanning;
5306 if (!dev) {
5307 // Get device of appropriate type
5308 dev = smi()->get_smart_device(cfg.name.c_str(), cfg.dev_type.c_str());
5309 if (!dev) {
5310 if (cfg.dev_type.empty())
5311 PrintOut(LOG_INFO, "Device: %s, unable to autodetect device type\n", cfg.name.c_str());
5312 else
5313 PrintOut(LOG_INFO, "Device: %s, unsupported device type '%s'\n", cfg.name.c_str(), cfg.dev_type.c_str());
5314 return false;
5315 }
5316 scanning = false;
5317 }
5318 else {
5319 // Use device from device scan
5320 scanning = true;
5321 }
5322
5323 // Save old info
5324 smart_device::device_info oldinfo = dev->get_info();
5325
5326 // Open with autodetect support, may return 'better' device
5327 dev.replace( dev->autodetect_open() );
5328
5329 // Report if type has changed
5330 if (oldinfo.dev_type != dev->get_dev_type())
5331 PrintOut(LOG_INFO, "Device: %s, type changed from '%s' to '%s'\n",
5332 cfg.name.c_str(), oldinfo.dev_type.c_str(), dev->get_dev_type());
5333
5334 // Return if autodetect_open() failed
5335 if (!dev->is_open()) {
5336 if (debugmode || !scanning)
5337 PrintOut(LOG_INFO, "Device: %s, open() failed: %s\n", dev->get_info_name(), dev->get_errmsg());
5338 return false;
5339 }
5340
5341 // Update informal name
5342 cfg.name = dev->get_info().info_name;
5343 PrintOut(LOG_INFO, "Device: %s, opened\n", cfg.name.c_str());
5344
5345 int status;
5346 const char * typemsg;
5347 // register ATA device
5348 if (dev->is_ata()){
5349 typemsg = "ATA";
5350 status = ATADeviceScan(cfg, state, dev->to_ata(), prev_cfgs);
5351 }
5352 // or register SCSI device
5353 else if (dev->is_scsi()){
5354 typemsg = "SCSI";
5355 status = SCSIDeviceScan(cfg, state, dev->to_scsi(), prev_cfgs);
5356 }
5357 // or register NVMe device
5358 else if (dev->is_nvme()) {
5359 typemsg = "NVMe";
5360 status = NVMeDeviceScan(cfg, state, dev->to_nvme(), prev_cfgs);
5361 }
5362 else {
5363 PrintOut(LOG_INFO, "Device: %s, neither ATA, SCSI nor NVMe device\n", cfg.name.c_str());
5364 return false;
5365 }
5366
5367 if (status) {
5368 if (!scanning || debugmode) {
5369 if (cfg.lineno)
5370 PrintOut(scanning ? LOG_INFO : LOG_CRIT,
5371 "Unable to register %s device %s at line %d of file %s\n",
5372 typemsg, cfg.name.c_str(), cfg.lineno, configfile);
5373 else
5374 PrintOut(LOG_INFO, "Unable to register %s device %s\n",
5375 typemsg, cfg.name.c_str());
5376 }
5377
5378 return false;
5379 }
5380
5381 return true;
5382 }
5383
5384 // This function tries devices from conf_entries. Each one that can be
5385 // registered is moved onto the [ata|scsi]devices lists and removed
5386 // from the conf_entries list.
register_devices(const dev_config_vector & conf_entries,smart_device_list & scanned_devs,dev_config_vector & configs,dev_state_vector & states,smart_device_list & devices)5387 static bool register_devices(const dev_config_vector & conf_entries, smart_device_list & scanned_devs,
5388 dev_config_vector & configs, dev_state_vector & states, smart_device_list & devices)
5389 {
5390 // start by clearing lists/memory of ALL existing devices
5391 configs.clear();
5392 devices.clear();
5393 states.clear();
5394
5395 // Map of already seen non-DEVICESCAN devices (unique_name -> cfg.name)
5396 typedef std::map<std::string, std::string> prev_unique_names_map;
5397 prev_unique_names_map prev_unique_names;
5398
5399 // Register entries
5400 for (unsigned i = 0; i < conf_entries.size(); i++) {
5401 dev_config cfg = conf_entries[i];
5402
5403 // Get unique device "name [type]" (with symlinks resolved) for duplicate detection
5404 std::string unique_name = smi()->get_unique_dev_name(cfg.dev_name.c_str(), cfg.dev_type.c_str());
5405 if (debugmode && unique_name != cfg.dev_name) {
5406 pout("Device: %s%s%s%s, unique name: %s\n", cfg.name.c_str(),
5407 (!cfg.dev_type.empty() ? " [" : ""), cfg.dev_type.c_str(),
5408 (!cfg.dev_type.empty() ? "]" : ""), unique_name.c_str());
5409 }
5410
5411 if (cfg.ignore) {
5412 // Store for duplicate detection and ignore
5413 PrintOut(LOG_INFO, "Device: %s%s%s%s, ignored\n", cfg.name.c_str(),
5414 (!cfg.dev_type.empty() ? " [" : ""), cfg.dev_type.c_str(),
5415 (!cfg.dev_type.empty() ? "]" : ""));
5416 prev_unique_names[unique_name] = cfg.name;
5417 continue;
5418 }
5419
5420 smart_device_auto_ptr dev;
5421
5422 // Device may already be detected during devicescan
5423 bool scanning = false;
5424 if (i < scanned_devs.size()) {
5425 dev = scanned_devs.release(i);
5426 if (dev) {
5427 // Check for a preceding non-DEVICESCAN entry for the same device
5428 prev_unique_names_map::iterator ui = prev_unique_names.find(unique_name);
5429 if (ui != prev_unique_names.end()) {
5430 bool ne = (ui->second != cfg.name);
5431 PrintOut(LOG_INFO, "Device: %s, %s%s, ignored\n", dev->get_info_name(),
5432 (ne ? "same as " : "duplicate"), (ne ? ui->second.c_str() : ""));
5433 continue;
5434 }
5435 scanning = true;
5436 }
5437 }
5438
5439 // Register device
5440 // If scanning, pass dev_idinfo of previous devices for duplicate check
5441 dev_state state;
5442 if (!register_device(cfg, state, dev, (scanning ? &configs : 0))) {
5443 // if device is explicitly listed and we can't register it, then
5444 // exit unless the user has specified that the device is removable
5445 if (!scanning) {
5446 if (!(cfg.removable || quit == QUIT_NEVER)) {
5447 PrintOut(LOG_CRIT, "Unable to register device %s (no Directive -d removable). Exiting.\n",
5448 cfg.name.c_str());
5449 return false;
5450 }
5451 PrintOut(LOG_INFO, "Device: %s, not available\n", cfg.name.c_str());
5452 // Prevent retry of registration
5453 prev_unique_names[unique_name] = cfg.name;
5454 }
5455 continue;
5456 }
5457
5458 // move onto the list of devices
5459 configs.push_back(cfg);
5460 states.push_back(state);
5461 devices.push_back(dev);
5462 if (!scanning)
5463 // Store for duplicate detection
5464 prev_unique_names[unique_name] = cfg.name;
5465 }
5466
5467 // Set factors for staggered tests
5468 for (unsigned i = 0, factor = 0; i < configs.size(); i++) {
5469 dev_config & cfg = configs[i];
5470 if (cfg.test_regex.empty())
5471 continue;
5472 cfg.test_offset_factor = factor++;
5473 }
5474
5475 init_disable_standby_check(configs);
5476 return true;
5477 }
5478
5479
5480 // Main program without exception handling
main_worker(int argc,char ** argv)5481 static int main_worker(int argc, char **argv)
5482 {
5483 // Initialize interface
5484 smart_interface::init();
5485 if (!smi())
5486 return 1;
5487
5488 // Check whether systemd notify is supported and enabled
5489 notify_init();
5490
5491 // parse input and print header and usage info if needed
5492 int status = parse_options(argc,argv);
5493 if (status >= 0)
5494 return status;
5495
5496 // Configuration for each device
5497 dev_config_vector configs;
5498 // Device states
5499 dev_state_vector states;
5500 // Devices to monitor
5501 smart_device_list devices;
5502
5503 // Drop capabilities if supported and enabled
5504 capabilities_drop_now();
5505
5506 notify_msg("Initializing ...");
5507
5508 // the main loop of the code
5509 bool firstpass = true, write_states_always = true;
5510 time_t wakeuptime = 0;
5511 // assert(status < 0);
5512 do {
5513 // Should we (re)read the config file?
5514 if (firstpass || caughtsigHUP){
5515 if (!firstpass) {
5516 // Write state files
5517 if (!state_path_prefix.empty())
5518 write_all_dev_states(configs, states);
5519
5520 PrintOut(LOG_INFO,
5521 caughtsigHUP==1?
5522 "Signal HUP - rereading configuration file %s\n":
5523 "\a\nSignal INT - rereading configuration file %s (" SIGQUIT_KEYNAME " quits)\n\n",
5524 configfile);
5525 notify_msg("Reloading ...");
5526 }
5527
5528 {
5529 dev_config_vector conf_entries; // Entries read from smartd.conf
5530 smart_device_list scanned_devs; // Devices found during scan
5531 // (re)reads config file, makes >=0 entries
5532 int entries = ReadOrMakeConfigEntries(conf_entries, scanned_devs);
5533
5534 if (entries>=0) {
5535 // checks devices, then moves onto ata/scsi list or deallocates.
5536 if (!register_devices(conf_entries, scanned_devs, configs, states, devices)) {
5537 status = EXIT_BADDEV;
5538 break;
5539 }
5540 if (!(configs.size() == devices.size() && configs.size() == states.size()))
5541 throw std::logic_error("Invalid result from RegisterDevices");
5542 // Handle limitations if capabilities are dropped
5543 capabilities_check_config(configs);
5544 }
5545 else if ( quit == QUIT_NEVER
5546 || ((quit == QUIT_NODEV || quit == QUIT_NODEVSTARTUP) && !firstpass)) {
5547 // user has asked to continue on error in configuration file
5548 if (!firstpass)
5549 PrintOut(LOG_INFO,"Reusing previous configuration\n");
5550 }
5551 else {
5552 // exit with configuration file error status
5553 status = (entries == -3 ? EXIT_READCONF : entries == -2 ? EXIT_NOCONF : EXIT_BADCONF);
5554 break;
5555 }
5556 }
5557
5558 if (!( devices.size() > 0 || quit == QUIT_NEVER
5559 || (quit == QUIT_NODEVSTARTUP && !firstpass))) {
5560 PrintOut(LOG_INFO, "Unable to monitor any SMART enabled devices. %sExiting...\n",
5561 (!debugmode ? "Try debug (-d) option. " : ""));
5562 status = EXIT_NODEV;
5563 break;
5564 }
5565
5566 // Log number of devices we are monitoring...
5567 int numata = 0, numscsi = 0;
5568 for (unsigned i = 0; i < devices.size(); i++) {
5569 const smart_device * dev = devices.at(i);
5570 if (dev->is_ata())
5571 numata++;
5572 else if (dev->is_scsi())
5573 numscsi++;
5574 }
5575 PrintOut(LOG_INFO, "Monitoring %d ATA/SATA, %d SCSI/SAS and %d NVMe devices\n",
5576 numata, numscsi, (int)devices.size() - numata - numscsi);
5577
5578 if (quit == QUIT_SHOWTESTS) {
5579 // user has asked to print test schedule
5580 PrintTestSchedule(configs, states, devices);
5581 // assert(firstpass);
5582 return 0;
5583 }
5584
5585 // reset signal
5586 caughtsigHUP=0;
5587
5588 // Always write state files after (re)configuration
5589 write_states_always = true;
5590 }
5591
5592 // check all devices once,
5593 // self tests are not started in first pass unless '-q onecheck' is specified
5594 notify_check((int)devices.size());
5595 CheckDevicesOnce(configs, states, devices, firstpass, (!firstpass || quit == QUIT_ONECHECK));
5596
5597 // Write state files
5598 if (!state_path_prefix.empty())
5599 write_all_dev_states(configs, states, write_states_always);
5600 write_states_always = false;
5601
5602 // Write attribute logs
5603 if (!attrlog_path_prefix.empty())
5604 write_all_dev_attrlogs(configs, states);
5605
5606 // user has asked us to exit after first check
5607 if (quit == QUIT_ONECHECK) {
5608 PrintOut(LOG_INFO,"Started with '-q onecheck' option. All devices successfully checked once.\n"
5609 "smartd is exiting (exit status 0)\n");
5610 // assert(firstpass);
5611 return 0;
5612 }
5613
5614 if (firstpass) {
5615 if (!debugmode) {
5616 // fork() into background if needed, close ALL file descriptors,
5617 // redirect stdin, stdout, and stderr, chdir to "/".
5618 status = daemon_init();
5619 if (status >= 0)
5620 return status;
5621
5622 // Write PID file if configured
5623 if (!write_pid_file())
5624 return EXIT_PID;
5625 }
5626
5627 // Set exit and signal handlers
5628 install_signal_handlers();
5629
5630 // Initialize wakeup time to CURRENT time
5631 wakeuptime = time(0);
5632
5633 firstpass = false;
5634 }
5635
5636 // sleep until next check time, or a signal arrives
5637 wakeuptime = dosleep(wakeuptime, write_states_always, (int)devices.size());
5638
5639 } while (!caughtsigEXIT);
5640
5641 if (caughtsigEXIT && status < 0) {
5642 // Loop exited on signal
5643 if (caughtsigEXIT == SIGTERM || (debugmode && caughtsigEXIT == SIGQUIT)) {
5644 PrintOut(LOG_INFO, "smartd received signal %d: %s\n",
5645 caughtsigEXIT, strsignal(caughtsigEXIT));
5646 }
5647 else {
5648 // Unexpected SIGINT or SIGQUIT
5649 PrintOut(LOG_CRIT, "smartd received unexpected signal %d: %s\n",
5650 caughtsigEXIT, strsignal(caughtsigEXIT));
5651 status = EXIT_SIGNAL;
5652 }
5653 }
5654
5655 // Status unset above implies success
5656 if (status < 0)
5657 status = 0;
5658
5659 if (!firstpass) {
5660 // Loop exited after daemon_init() and write_pid_file()
5661
5662 // Write state files only on normal exit
5663 if (!status && !state_path_prefix.empty())
5664 write_all_dev_states(configs, states);
5665
5666 // Delete PID file, if one was created
5667 if (!pid_file.empty() && unlink(pid_file.c_str()))
5668 PrintOut(LOG_CRIT,"Can't unlink PID file %s (%s).\n",
5669 pid_file.c_str(), strerror(errno));
5670
5671 // and this should be the final output from smartd before it exits
5672 PrintOut((status ? LOG_CRIT : LOG_INFO), "smartd is exiting (exit status %d)\n",
5673 status);
5674 }
5675
5676 return status;
5677 }
5678
5679
5680 #ifndef _WIN32
5681 // Main program
main(int argc,char ** argv)5682 int main(int argc, char **argv)
5683 #else
5684 // Windows: internal main function started direct or by service control manager
5685 static int smartd_main(int argc, char **argv)
5686 #endif
5687 {
5688 int status;
5689 try {
5690 // Do the real work ...
5691 status = main_worker(argc, argv);
5692 }
5693 catch (const std::bad_alloc & /*ex*/) {
5694 // Memory allocation failed (also thrown by std::operator new)
5695 PrintOut(LOG_CRIT, "Smartd: Out of memory\n");
5696 status = EXIT_NOMEM;
5697 }
5698 catch (const std::exception & ex) {
5699 // Other fatal errors
5700 PrintOut(LOG_CRIT, "Smartd: Exception: %s\n", ex.what());
5701 status = EXIT_BADCODE;
5702 }
5703
5704 // Check for remaining device objects
5705 if (smart_device::get_num_objects() != 0) {
5706 PrintOut(LOG_CRIT, "Smartd: Internal Error: %d device object(s) left at exit.\n",
5707 smart_device::get_num_objects());
5708 status = EXIT_BADCODE;
5709 }
5710
5711 if (status == EXIT_BADCODE)
5712 PrintOut(LOG_CRIT, "Please inform " PACKAGE_BUGREPORT ", including output of smartd -V.\n");
5713
5714 notify_exit(status);
5715 #ifdef _WIN32
5716 daemon_winsvc_exitcode = status;
5717 #endif
5718 return status;
5719 }
5720
5721
5722 #ifdef _WIN32
5723 // Main function for Windows
main(int argc,char ** argv)5724 int main(int argc, char **argv){
5725 // Options for smartd windows service
5726 static const daemon_winsvc_options svc_opts = {
5727 "--service", // cmd_opt
5728 "smartd", "SmartD Service", // servicename, displayname
5729 // description
5730 "Controls and monitors storage devices using the Self-Monitoring, "
5731 "Analysis and Reporting Technology System (SMART) built into "
5732 "ATA/SATA and SCSI/SAS hard drives and solid-state drives. "
5733 "www.smartmontools.org"
5734 };
5735 // daemon_main() handles daemon and service specific commands
5736 // and starts smartd_main() direct, from a new process,
5737 // or via service control manager
5738 return daemon_main("smartd", &svc_opts , smartd_main, argc, argv);
5739 }
5740 #endif
5741