1 /*
2  * Home page of code is: https://www.smartmontools.org
3  *
4  * Copyright (C) 2002-11 Bruce Allen
5  * Copyright (C) 2008-20 Christian Franke
6  * Copyright (C) 2000    Michael Cornwell <cornwell@acm.org>
7  * Copyright (C) 2008    Oliver Bock <brevilo@users.sourceforge.net>
8  *
9  * SPDX-License-Identifier: GPL-2.0-or-later
10  */
11 
12 #include "config.h"
13 #define __STDC_FORMAT_MACROS 1 // enable PRI* for C++
14 
15 // unconditionally included files
16 #include <inttypes.h>
17 #include <stdio.h>
18 #include <sys/types.h>
19 #include <sys/stat.h>   // umask
20 #include <signal.h>
21 #include <fcntl.h>
22 #include <string.h>
23 #include <syslog.h>
24 #include <stdarg.h>
25 #include <stdlib.h>
26 #include <errno.h>
27 #include <time.h>
28 #include <limits.h>
29 #include <getopt.h>
30 
31 #include <algorithm> // std::replace()
32 #include <map>
33 #include <stdexcept>
34 #include <string>
35 #include <vector>
36 
37 // conditionally included files
38 #ifndef _WIN32
39 #include <sys/wait.h>
40 #endif
41 #ifdef HAVE_UNISTD_H
42 #include <unistd.h>
43 #endif
44 
45 #ifdef _WIN32
46 #include "os_win32/popen.h" // popen/pclose()
47 #ifdef _MSC_VER
48 #pragma warning(disable:4761) // "conversion supplied"
49 typedef unsigned short mode_t;
50 typedef int pid_t;
51 #endif
52 #include <io.h> // umask()
53 #include <process.h> // getpid()
54 #endif // _WIN32
55 
56 #ifdef __CYGWIN__
57 #include <io.h> // setmode()
58 #endif // __CYGWIN__
59 
60 #ifdef HAVE_LIBCAP_NG
61 #include <cap-ng.h>
62 #endif // LIBCAP_NG
63 
64 #ifdef HAVE_LIBSYSTEMD
65 #include <systemd/sd-daemon.h>
66 #endif // HAVE_LIBSYSTEMD
67 
68 // locally included files
69 #include "atacmds.h"
70 #include "dev_interface.h"
71 #include "knowndrives.h"
72 #include "scsicmds.h"
73 #include "nvmecmds.h"
74 #include "utility.h"
75 
76 #ifdef _WIN32
77 // fork()/signal()/initd simulation for native Windows
78 #include "os_win32/daemon_win32.h" // daemon_main/detach/signal()
79 #define strsignal daemon_strsignal
80 #define sleep     daemon_sleep
81 // SIGQUIT does not exist, CONTROL-Break signals SIGBREAK.
82 #define SIGQUIT SIGBREAK
83 #define SIGQUIT_KEYNAME "CONTROL-Break"
84 #else  // _WIN32
85 #define SIGQUIT_KEYNAME "CONTROL-\\"
86 #endif // _WIN32
87 
88 const char * smartd_cpp_cvsid = "$Id: smartd.cpp 5118 2020-11-23 18:25:16Z chrfranke $"
89   CONFIG_H_CVSID;
90 
91 extern "C" {
92   typedef void (*signal_handler_type)(int);
93 }
94 
set_signal_if_not_ignored(int sig,signal_handler_type handler)95 static void set_signal_if_not_ignored(int sig, signal_handler_type handler)
96 {
97 #if defined(_WIN32)
98   // signal() emulation
99   daemon_signal(sig, handler);
100 
101 #elif defined(HAVE_SIGACTION)
102   // SVr4, POSIX.1-2001, POSIX.1-2008
103   struct sigaction sa;
104   sa.sa_handler = SIG_DFL;
105   sigaction(sig, (struct sigaction *)0, &sa);
106   if (sa.sa_handler == SIG_IGN)
107     return;
108 
109   memset(&sa, 0, sizeof(sa));
110   sa.sa_handler = handler;
111   sa.sa_flags = SA_RESTART; // BSD signal() semantics
112   sigaction(sig, &sa, (struct sigaction *)0);
113 
114 #elif defined(HAVE_SIGSET)
115   // SVr4, POSIX.1-2001, obsoleted in POSIX.1-2008
116   if (sigset(sig, handler) == SIG_IGN)
117     sigset(sig, SIG_IGN);
118 
119 #else
120   // POSIX.1-2001, POSIX.1-2008, C89, C99, undefined semantics.
121   // Important: BSD semantics is required.  Traditional signal()
122   // resets the handler to SIG_DFL after the first signal is caught.
123   if (signal(sig, handler) == SIG_IGN)
124     signal(sig, SIG_IGN);
125 #endif
126 }
127 
128 using namespace smartmontools;
129 
130 // smartd exit codes
131 #define EXIT_BADCMD    1   // command line did not parse
132 #define EXIT_BADCONF   2   // syntax error in config file
133 #define EXIT_STARTUP   3   // problem forking daemon
134 #define EXIT_PID       4   // problem creating pid file
135 #define EXIT_NOCONF    5   // config file does not exist
136 #define EXIT_READCONF  6   // config file exists but cannot be read
137 
138 #define EXIT_NOMEM     8   // out of memory
139 #define EXIT_BADCODE   10  // internal error - should NEVER happen
140 
141 #define EXIT_BADDEV    16  // we can't monitor this device
142 #define EXIT_NODEV     17  // no devices to monitor
143 
144 #define EXIT_SIGNAL    254 // abort on signal
145 
146 
147 // command-line: 1=debug mode, 2=print presets
148 static unsigned char debugmode = 0;
149 
150 // command-line: how long to sleep between checks
151 #define CHECKTIME 1800
152 static int checktime=CHECKTIME;
153 
154 // command-line: name of PID file (empty for no pid file)
155 static std::string pid_file;
156 
157 // command-line: path prefix of persistent state file, empty if no persistence.
158 static std::string state_path_prefix
159 #ifdef SMARTMONTOOLS_SAVESTATES
160           = SMARTMONTOOLS_SAVESTATES
161 #endif
162                                     ;
163 
164 // command-line: path prefix of attribute log file, empty if no logs.
165 static std::string attrlog_path_prefix
166 #ifdef SMARTMONTOOLS_ATTRIBUTELOG
167           = SMARTMONTOOLS_ATTRIBUTELOG
168 #endif
169                                     ;
170 
171 // configuration file name
172 static const char * configfile;
173 // configuration file "name" if read from stdin
174 static const char * const configfile_stdin = "<stdin>";
175 // path of alternate configuration file
176 static std::string configfile_alt;
177 
178 // warning script file
179 static std::string warning_script;
180 
181 // command-line: when should we exit?
182 enum quit_t {
183   QUIT_NODEV, QUIT_NODEVSTARTUP, QUIT_NEVER, QUIT_ONECHECK,
184   QUIT_SHOWTESTS, QUIT_ERRORS
185 };
186 static quit_t quit = QUIT_NODEV;
187 
188 // command-line; this is the default syslog(3) log facility to use.
189 static int facility=LOG_DAEMON;
190 
191 #ifndef _WIN32
192 // command-line: fork into background?
193 static bool do_fork=true;
194 #endif
195 
196 // TODO: This smartctl only variable is also used in some os_*.cpp
197 unsigned char failuretest_permissive = 0;
198 
199 // set to one if we catch a USR1 (check devices now)
200 static volatile int caughtsigUSR1=0;
201 
202 #ifdef _WIN32
203 // set to one if we catch a USR2 (toggle debug mode)
204 static volatile int caughtsigUSR2=0;
205 #endif
206 
207 // set to one if we catch a HUP (reload config file). In debug mode,
208 // set to two, if we catch INT (also reload config file).
209 static volatile int caughtsigHUP=0;
210 
211 // set to signal value if we catch INT, QUIT, or TERM
212 static volatile int caughtsigEXIT=0;
213 
214 // This function prints either to stdout or to the syslog as needed.
215 static void PrintOut(int priority, const char *fmt, ...)
216                      __attribute_format_printf(2, 3);
217 
218 #ifdef HAVE_LIBSYSTEMD
219 // systemd notify support
220 
221 static bool notify_enabled = false;
222 
notify_init()223 static inline void notify_init()
224 {
225   if (!getenv("NOTIFY_SOCKET"))
226     return;
227   notify_enabled = true;
228 }
229 
notify_post_init()230 static inline bool notify_post_init()
231 {
232   if (!notify_enabled)
233     return true;
234   if (do_fork) {
235     PrintOut(LOG_CRIT, "Option -n (--no-fork) is required if 'Type=notify' is set.\n");
236     return false;
237   }
238   return true;
239 }
240 
notify_msg(const char * msg,bool ready=false)241 static void notify_msg(const char * msg, bool ready = false)
242 {
243   if (!notify_enabled)
244     return;
245   if (debugmode) {
246     pout("sd_notify(0, \"%sSTATUS=%s\")\n", (ready ? "READY=1\\n" : ""), msg);
247     return;
248   }
249   sd_notifyf(0, "%sSTATUS=%s", (ready ? "READY=1\n" : ""), msg);
250 }
251 
notify_check(int numdev)252 static void notify_check(int numdev)
253 {
254   if (!notify_enabled)
255     return;
256   char msg[32];
257   snprintf(msg, sizeof(msg), "Checking %d device%s ...",
258            numdev, (numdev != 1 ? "s" : ""));
259   notify_msg(msg);
260 }
261 
notify_wait(time_t wakeuptime,int numdev)262 static void notify_wait(time_t wakeuptime, int numdev)
263 {
264   if (!notify_enabled)
265     return;
266   char ts[16] = ""; struct tm tmbuf;
267   strftime(ts, sizeof(ts), "%H:%M:%S", time_to_tm_local(&tmbuf, wakeuptime));
268   char msg[64];
269   snprintf(msg, sizeof(msg), "Next check of %d device%s will start at %s",
270            numdev, (numdev != 1 ? "s" : ""), ts);
271   static bool ready = true; // first call notifies READY=1
272   notify_msg(msg, ready);
273   ready = false;
274 }
275 
notify_exit(int status)276 static void notify_exit(int status)
277 {
278   if (!notify_enabled)
279     return;
280   const char * msg;
281   switch (status) {
282     case 0:             msg = "Exiting ..."; break;
283     case EXIT_BADCMD:   msg = "Error in command line (see SYSLOG)"; break;
284     case EXIT_BADCONF: case EXIT_NOCONF:
285     case EXIT_READCONF: msg = "Error in config file (see SYSLOG)"; break;
286     case EXIT_BADDEV:   msg = "Unable to register a device (see SYSLOG)"; break;
287     case EXIT_NODEV:    msg = "No devices to monitor"; break;
288     default:            msg = "Error (see SYSLOG)"; break;
289   }
290   notify_msg(msg);
291 }
292 
293 #else // HAVE_LIBSYSTEMD
294 // No systemd notify support
295 
notify_post_init()296 static inline bool notify_post_init()
297 {
298 #ifdef __linux__
299   if (getenv("NOTIFY_SOCKET")) {
300     PrintOut(LOG_CRIT, "This version of smartd was build without 'Type=notify' support.\n");
301     return false;
302   }
303 #endif
304   return true;
305 }
306 
notify_init()307 static inline void notify_init() { }
notify_msg(const char *)308 static inline void notify_msg(const char *) { }
notify_check(int)309 static inline void notify_check(int) { }
notify_wait(time_t,int)310 static inline void notify_wait(time_t, int) { }
notify_exit(int)311 static inline void notify_exit(int) { }
312 
313 #endif // HAVE_LIBSYSTEMD
314 
315 // Attribute monitoring flags.
316 // See monitor_attr_flags below.
317 enum {
318   MONITOR_IGN_FAILUSE = 0x01,
319   MONITOR_IGNORE      = 0x02,
320   MONITOR_RAW_PRINT   = 0x04,
321   MONITOR_RAW         = 0x08,
322   MONITOR_AS_CRIT     = 0x10,
323   MONITOR_RAW_AS_CRIT = 0x20,
324 };
325 
326 // Array of flags for each attribute.
327 class attribute_flags
328 {
329 public:
attribute_flags()330   attribute_flags()
331     { memset(m_flags, 0, sizeof(m_flags)); }
332 
is_set(int id,unsigned char flag) const333   bool is_set(int id, unsigned char flag) const
334     { return (0 < id && id < (int)sizeof(m_flags) && (m_flags[id] & flag)); }
335 
set(int id,unsigned char flags)336   void set(int id, unsigned char flags)
337     {
338       if (0 < id && id < (int)sizeof(m_flags))
339         m_flags[id] |= flags;
340     }
341 
342 private:
343   unsigned char m_flags[256];
344 };
345 
346 
347 /// Configuration data for a device. Read from smartd.conf.
348 /// Supports copy & assignment and is compatible with STL containers.
349 struct dev_config
350 {
351   int lineno;                             // Line number of entry in file
352   std::string name;                       // Device name (with optional extra info)
353   std::string dev_name;                   // Device name (plain, for SMARTD_DEVICE variable)
354   std::string dev_type;                   // Device type argument from -d directive, empty if none
355   std::string dev_idinfo;                 // Device identify info for warning emails
356   std::string state_file;                 // Path of the persistent state file, empty if none
357   std::string attrlog_file;               // Path of the persistent attrlog file, empty if none
358   bool ignore;                            // Ignore this entry
359   bool id_is_unique;                      // True if dev_idinfo is unique (includes S/N or WWN)
360   bool smartcheck;                        // Check SMART status
361   bool usagefailed;                       // Check for failed Usage Attributes
362   bool prefail;                           // Track changes in Prefail Attributes
363   bool usage;                             // Track changes in Usage Attributes
364   bool selftest;                          // Monitor number of selftest errors
365   bool errorlog;                          // Monitor number of ATA errors
366   bool xerrorlog;                         // Monitor number of ATA errors (Extended Comprehensive error log)
367   bool offlinests;                        // Monitor changes in offline data collection status
368   bool offlinests_ns;                     // Disable auto standby if in progress
369   bool selfteststs;                       // Monitor changes in self-test execution status
370   bool selfteststs_ns;                    // Disable auto standby if in progress
371   bool permissive;                        // Ignore failed SMART commands
372   char autosave;                          // 1=disable, 2=enable Autosave Attributes
373   char autoofflinetest;                   // 1=disable, 2=enable Auto Offline Test
374   firmwarebug_defs firmwarebugs;          // -F directives from drivedb or smartd.conf
375   bool ignorepresets;                     // Ignore database of -v options
376   bool showpresets;                       // Show database entry for this device
377   bool removable;                         // Device may disappear (not be present)
378   char powermode;                         // skip check, if disk in idle or standby mode
379   bool powerquiet;                        // skip powermode 'skipping checks' message
380   int powerskipmax;                       // how many times can be check skipped
381   unsigned char tempdiff;                 // Track Temperature changes >= this limit
382   unsigned char tempinfo, tempcrit;       // Track Temperatures >= these limits as LOG_INFO, LOG_CRIT+mail
383   regular_expression test_regex;          // Regex for scheduled testing
384   unsigned test_offset_factor;            // Factor for staggering of scheduled tests
385 
386   // Configuration of email warning messages
387   std::string emailcmdline;               // script to execute, empty if no messages
388   std::string emailaddress;               // email address, or empty
389   unsigned char emailfreq;                // Emails once (1) daily (2) diminishing (3)
390   bool emailtest;                         // Send test email?
391 
392   // ATA ONLY
393   int dev_rpm; // rotation rate, 0 = unknown, 1 = SSD, >1 = HDD
394   int set_aam; // disable(-1), enable(1..255->0..254) Automatic Acoustic Management
395   int set_apm; // disable(-1), enable(2..255->1..254) Advanced Power Management
396   int set_lookahead; // disable(-1), enable(1) read look-ahead
397   int set_standby; // set(1..255->0..254) standby timer
398   bool set_security_freeze; // Freeze ATA security
399   int set_wcache; // disable(-1), enable(1) write cache
400   int set_dsn; // disable(0x2), enable(0x1) DSN
401 
402   bool sct_erc_set;                       // set SCT ERC to:
403   unsigned short sct_erc_readtime;        // ERC read time (deciseconds)
404   unsigned short sct_erc_writetime;       // ERC write time (deciseconds)
405 
406   unsigned char curr_pending_id;          // ID of current pending sector count, 0 if none
407   unsigned char offl_pending_id;          // ID of offline uncorrectable sector count, 0 if none
408   bool curr_pending_incr, offl_pending_incr; // True if current/offline pending values increase
409   bool curr_pending_set,  offl_pending_set;  // True if '-C', '-U' set in smartd.conf
410 
411   attribute_flags monitor_attr_flags;     // MONITOR_* flags for each attribute
412 
413   ata_vendor_attr_defs attribute_defs;    // -v options
414 
415   dev_config();
416 };
417 
dev_config()418 dev_config::dev_config()
419 : lineno(0),
420   ignore(false),
421   id_is_unique(false),
422   smartcheck(false),
423   usagefailed(false),
424   prefail(false),
425   usage(false),
426   selftest(false),
427   errorlog(false),
428   xerrorlog(false),
429   offlinests(false),  offlinests_ns(false),
430   selfteststs(false), selfteststs_ns(false),
431   permissive(false),
432   autosave(0),
433   autoofflinetest(0),
434   ignorepresets(false),
435   showpresets(false),
436   removable(false),
437   powermode(0),
438   powerquiet(false),
439   powerskipmax(0),
440   tempdiff(0),
441   tempinfo(0), tempcrit(0),
442   test_offset_factor(0),
443   emailfreq(0),
444   emailtest(false),
445   dev_rpm(0),
446   set_aam(0), set_apm(0),
447   set_lookahead(0),
448   set_standby(0),
449   set_security_freeze(false),
450   set_wcache(0), set_dsn(0),
451   sct_erc_set(false),
452   sct_erc_readtime(0), sct_erc_writetime(0),
453   curr_pending_id(0), offl_pending_id(0),
454   curr_pending_incr(false), offl_pending_incr(false),
455   curr_pending_set(false),  offl_pending_set(false)
456 {
457 }
458 
459 
460 // Number of allowed mail message types
461 static const int SMARTD_NMAIL = 13;
462 // Type for '-M test' mails (state not persistent)
463 static const int MAILTYPE_TEST = 0;
464 // TODO: Add const or enum for all mail types.
465 
466 struct mailinfo {
467   int logged;// number of times an email has been sent
468   time_t firstsent;// time first email was sent, as defined by time(2)
469   time_t lastsent; // time last email was sent, as defined by time(2)
470 
mailinfomailinfo471   mailinfo()
472     : logged(0), firstsent(0), lastsent(0) { }
473 };
474 
475 /// Persistent state data for a device.
476 struct persistent_dev_state
477 {
478   unsigned char tempmin, tempmax;         // Min/Max Temperatures
479 
480   unsigned char selflogcount;             // total number of self-test errors
481   unsigned short selfloghour;             // lifetime hours of last self-test error
482 
483   time_t scheduled_test_next_check;       // Time of next check for scheduled self-tests
484 
485   uint64_t selective_test_last_start;     // Start LBA of last scheduled selective self-test
486   uint64_t selective_test_last_end;       // End LBA of last scheduled selective self-test
487 
488   mailinfo maillog[SMARTD_NMAIL];         // log info on when mail sent
489 
490   // ATA ONLY
491   int ataerrorcount;                      // Total number of ATA errors
492 
493   // Persistent part of ata_smart_values:
494   struct ata_attribute {
495     unsigned char id;
496     unsigned char val;
497     unsigned char worst; // Byte needed for 'raw64' attribute only.
498     uint64_t raw;
499     unsigned char resvd;
500 
ata_attributepersistent_dev_state::ata_attribute501     ata_attribute() : id(0), val(0), worst(0), raw(0), resvd(0) { }
502   };
503   ata_attribute ata_attributes[NUMBER_ATA_SMART_ATTRIBUTES];
504 
505   // SCSI ONLY
506 
507   struct scsi_error_counter_t {
508     struct scsiErrorCounter errCounter;
509     unsigned char found;
scsi_error_counter_tpersistent_dev_state::scsi_error_counter_t510     scsi_error_counter_t() : found(0)
511       { memset(&errCounter, 0, sizeof(errCounter)); }
512   };
513   scsi_error_counter_t scsi_error_counters[3];
514 
515   struct scsi_nonmedium_error_t {
516     struct scsiNonMediumError nme;
517     unsigned char found;
scsi_nonmedium_error_tpersistent_dev_state::scsi_nonmedium_error_t518     scsi_nonmedium_error_t() : found(0)
519       { memset(&nme, 0, sizeof(nme)); }
520   };
521   scsi_nonmedium_error_t scsi_nonmedium_error;
522 
523   // NVMe only
524   uint64_t nvme_err_log_entries;
525 
526   persistent_dev_state();
527 };
528 
persistent_dev_state()529 persistent_dev_state::persistent_dev_state()
530 : tempmin(0), tempmax(0),
531   selflogcount(0),
532   selfloghour(0),
533   scheduled_test_next_check(0),
534   selective_test_last_start(0),
535   selective_test_last_end(0),
536   ataerrorcount(0),
537   nvme_err_log_entries(0)
538 {
539 }
540 
541 /// Non-persistent state data for a device.
542 struct temp_dev_state
543 {
544   bool must_write;                        // true if persistent part should be written
545 
546   bool not_cap_offline;                   // true == not capable of offline testing
547   bool not_cap_conveyance;
548   bool not_cap_short;
549   bool not_cap_long;
550   bool not_cap_selective;
551 
552   unsigned char temperature;              // last recorded Temperature (in Celsius)
553   time_t tempmin_delay;                   // time where Min Temperature tracking will start
554 
555   bool removed;                           // true if open() failed for removable device
556 
557   bool powermodefail;                     // true if power mode check failed
558   int powerskipcnt;                       // Number of checks skipped due to idle or standby mode
559   int lastpowermodeskipped;               // the last power mode that was skipped
560 
561   bool attrlog_dirty;                     // true if persistent part has new attr values that
562                                           // need to be written to attrlog
563 
564   // SCSI ONLY
565   unsigned char SmartPageSupported;       // has log sense IE page (0x2f)
566   unsigned char TempPageSupported;        // has log sense temperature page (0xd)
567   unsigned char ReadECounterPageSupported;
568   unsigned char WriteECounterPageSupported;
569   unsigned char VerifyECounterPageSupported;
570   unsigned char NonMediumErrorPageSupported;
571   unsigned char SuppressReport;           // minimize nuisance reports
572   unsigned char modese_len;               // mode sense/select cmd len: 0 (don't
573                                           // know yet) 6 or 10
574   // ATA ONLY
575   uint64_t num_sectors;                   // Number of sectors
576   ata_smart_values smartval;              // SMART data
577   ata_smart_thresholds_pvt smartthres;    // SMART thresholds
578   bool offline_started;                   // true if offline data collection was started
579   bool selftest_started;                  // true if self-test was started
580 
581   temp_dev_state();
582 };
583 
temp_dev_state()584 temp_dev_state::temp_dev_state()
585 : must_write(false),
586   not_cap_offline(false),
587   not_cap_conveyance(false),
588   not_cap_short(false),
589   not_cap_long(false),
590   not_cap_selective(false),
591   temperature(0),
592   tempmin_delay(0),
593   removed(false),
594   powermodefail(false),
595   powerskipcnt(0),
596   lastpowermodeskipped(0),
597   attrlog_dirty(false),
598   SmartPageSupported(false),
599   TempPageSupported(false),
600   ReadECounterPageSupported(false),
601   WriteECounterPageSupported(false),
602   VerifyECounterPageSupported(false),
603   NonMediumErrorPageSupported(false),
604   SuppressReport(false),
605   modese_len(0),
606   num_sectors(0),
607   offline_started(false),
608   selftest_started(false)
609 {
610   memset(&smartval, 0, sizeof(smartval));
611   memset(&smartthres, 0, sizeof(smartthres));
612 }
613 
614 /// Runtime state data for a device.
615 struct dev_state
616 : public persistent_dev_state,
617   public temp_dev_state
618 {
619   void update_persistent_state();
620   void update_temp_state();
621 };
622 
623 /// Container for configuration info for each device.
624 typedef std::vector<dev_config> dev_config_vector;
625 
626 /// Container for state info for each device.
627 typedef std::vector<dev_state> dev_state_vector;
628 
629 // Copy ATA attributes to persistent state.
update_persistent_state()630 void dev_state::update_persistent_state()
631 {
632   for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
633     const ata_smart_attribute & ta = smartval.vendor_attributes[i];
634     ata_attribute & pa = ata_attributes[i];
635     pa.id = ta.id;
636     if (ta.id == 0) {
637       pa.val = pa.worst = 0; pa.raw = 0;
638       continue;
639     }
640     pa.val = ta.current;
641     pa.worst = ta.worst;
642     pa.raw =            ta.raw[0]
643            | (          ta.raw[1] <<  8)
644            | (          ta.raw[2] << 16)
645            | ((uint64_t)ta.raw[3] << 24)
646            | ((uint64_t)ta.raw[4] << 32)
647            | ((uint64_t)ta.raw[5] << 40);
648     pa.resvd = ta.reserv;
649   }
650 }
651 
652 // Copy ATA from persistent to temp state.
update_temp_state()653 void dev_state::update_temp_state()
654 {
655   for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
656     const ata_attribute & pa = ata_attributes[i];
657     ata_smart_attribute & ta = smartval.vendor_attributes[i];
658     ta.id = pa.id;
659     if (pa.id == 0) {
660       ta.current = ta.worst = 0;
661       memset(ta.raw, 0, sizeof(ta.raw));
662       continue;
663     }
664     ta.current = pa.val;
665     ta.worst = pa.worst;
666     ta.raw[0] = (unsigned char) pa.raw;
667     ta.raw[1] = (unsigned char)(pa.raw >>  8);
668     ta.raw[2] = (unsigned char)(pa.raw >> 16);
669     ta.raw[3] = (unsigned char)(pa.raw >> 24);
670     ta.raw[4] = (unsigned char)(pa.raw >> 32);
671     ta.raw[5] = (unsigned char)(pa.raw >> 40);
672     ta.reserv = pa.resvd;
673   }
674 }
675 
676 // Parse a line from a state file.
parse_dev_state_line(const char * line,persistent_dev_state & state)677 static bool parse_dev_state_line(const char * line, persistent_dev_state & state)
678 {
679   static const regular_expression regex(
680     "^ *"
681      "((temperature-min)" // (1 (2)
682      "|(temperature-max)" // (3)
683      "|(self-test-errors)" // (4)
684      "|(self-test-last-err-hour)" // (5)
685      "|(scheduled-test-next-check)" // (6)
686      "|(selective-test-last-start)" // (7)
687      "|(selective-test-last-end)" // (8)
688      "|(ata-error-count)"  // (9)
689      "|(mail\\.([0-9]+)\\." // (10 (11)
690        "((count)" // (12 (13)
691        "|(first-sent-time)" // (14)
692        "|(last-sent-time)" // (15)
693        ")" // 12)
694       ")" // 10)
695      "|(ata-smart-attribute\\.([0-9]+)\\." // (16 (17)
696        "((id)" // (18 (19)
697        "|(val)" // (20)
698        "|(worst)" // (21)
699        "|(raw)" // (22)
700        "|(resvd)" // (23)
701        ")" // 18)
702       ")" // 16)
703      "|(nvme-err-log-entries)" // (24)
704      ")" // 1)
705      " *= *([0-9]+)[ \n]*$" // (25)
706   );
707 
708   const int nmatch = 1+25;
709   regular_expression::match_range match[nmatch];
710   if (!regex.execute(line, nmatch, match))
711     return false;
712   if (match[nmatch-1].rm_so < 0)
713     return false;
714 
715   uint64_t val = strtoull(line + match[nmatch-1].rm_so, (char **)0, 10);
716 
717   int m = 1;
718   if (match[++m].rm_so >= 0)
719     state.tempmin = (unsigned char)val;
720   else if (match[++m].rm_so >= 0)
721     state.tempmax = (unsigned char)val;
722   else if (match[++m].rm_so >= 0)
723     state.selflogcount = (unsigned char)val;
724   else if (match[++m].rm_so >= 0)
725     state.selfloghour = (unsigned short)val;
726   else if (match[++m].rm_so >= 0)
727     state.scheduled_test_next_check = (time_t)val;
728   else if (match[++m].rm_so >= 0)
729     state.selective_test_last_start = val;
730   else if (match[++m].rm_so >= 0)
731     state.selective_test_last_end = val;
732   else if (match[++m].rm_so >= 0)
733     state.ataerrorcount = (int)val;
734   else if (match[m+=2].rm_so >= 0) {
735     int i = atoi(line+match[m].rm_so);
736     if (!(0 <= i && i < SMARTD_NMAIL))
737       return false;
738     if (i == MAILTYPE_TEST) // Don't suppress test mails
739       return true;
740     if (match[m+=2].rm_so >= 0)
741       state.maillog[i].logged = (int)val;
742     else if (match[++m].rm_so >= 0)
743       state.maillog[i].firstsent = (time_t)val;
744     else if (match[++m].rm_so >= 0)
745       state.maillog[i].lastsent = (time_t)val;
746     else
747       return false;
748   }
749   else if (match[m+=5+1].rm_so >= 0) {
750     int i = atoi(line+match[m].rm_so);
751     if (!(0 <= i && i < NUMBER_ATA_SMART_ATTRIBUTES))
752       return false;
753     if (match[m+=2].rm_so >= 0)
754       state.ata_attributes[i].id = (unsigned char)val;
755     else if (match[++m].rm_so >= 0)
756       state.ata_attributes[i].val = (unsigned char)val;
757     else if (match[++m].rm_so >= 0)
758       state.ata_attributes[i].worst = (unsigned char)val;
759     else if (match[++m].rm_so >= 0)
760       state.ata_attributes[i].raw = val;
761     else if (match[++m].rm_so >= 0)
762       state.ata_attributes[i].resvd = (unsigned char)val;
763     else
764       return false;
765   }
766   else if (match[m+7].rm_so >= 0)
767     state.nvme_err_log_entries = val;
768   else
769     return false;
770   return true;
771 }
772 
773 // Read a state file.
read_dev_state(const char * path,persistent_dev_state & state)774 static bool read_dev_state(const char * path, persistent_dev_state & state)
775 {
776   stdio_file f(path, "r");
777   if (!f) {
778     if (errno != ENOENT)
779       pout("Cannot read state file \"%s\"\n", path);
780     return false;
781   }
782 #ifdef __CYGWIN__
783   setmode(fileno(f), O_TEXT); // Allow files with \r\n
784 #endif
785 
786   persistent_dev_state new_state;
787   int good = 0, bad = 0;
788   char line[256];
789   while (fgets(line, sizeof(line), f)) {
790     const char * s = line + strspn(line, " \t");
791     if (!*s || *s == '#')
792       continue;
793     if (!parse_dev_state_line(line, new_state))
794       bad++;
795     else
796       good++;
797   }
798 
799   if (bad) {
800     if (!good) {
801       pout("%s: format error\n", path);
802       return false;
803     }
804     pout("%s: %d invalid line(s) ignored\n", path, bad);
805   }
806 
807   // This sets the values missing in the file to 0.
808   state = new_state;
809   return true;
810 }
811 
write_dev_state_line(FILE * f,const char * name,uint64_t val)812 static void write_dev_state_line(FILE * f, const char * name, uint64_t val)
813 {
814   if (val)
815     fprintf(f, "%s = %" PRIu64 "\n", name, val);
816 }
817 
write_dev_state_line(FILE * f,const char * name1,int id,const char * name2,uint64_t val)818 static void write_dev_state_line(FILE * f, const char * name1, int id, const char * name2, uint64_t val)
819 {
820   if (val)
821     fprintf(f, "%s.%d.%s = %" PRIu64 "\n", name1, id, name2, val);
822 }
823 
824 // Write a state file
write_dev_state(const char * path,const persistent_dev_state & state)825 static bool write_dev_state(const char * path, const persistent_dev_state & state)
826 {
827   // Rename old "file" to "file~"
828   std::string pathbak = path; pathbak += '~';
829   unlink(pathbak.c_str());
830   rename(path, pathbak.c_str());
831 
832   stdio_file f(path, "w");
833   if (!f) {
834     pout("Cannot create state file \"%s\"\n", path);
835     return false;
836   }
837 
838   fprintf(f, "# smartd state file\n");
839   write_dev_state_line(f, "temperature-min", state.tempmin);
840   write_dev_state_line(f, "temperature-max", state.tempmax);
841   write_dev_state_line(f, "self-test-errors", state.selflogcount);
842   write_dev_state_line(f, "self-test-last-err-hour", state.selfloghour);
843   write_dev_state_line(f, "scheduled-test-next-check", state.scheduled_test_next_check);
844   write_dev_state_line(f, "selective-test-last-start", state.selective_test_last_start);
845   write_dev_state_line(f, "selective-test-last-end", state.selective_test_last_end);
846 
847   int i;
848   for (i = 0; i < SMARTD_NMAIL; i++) {
849     if (i == MAILTYPE_TEST) // Don't suppress test mails
850       continue;
851     const mailinfo & mi = state.maillog[i];
852     if (!mi.logged)
853       continue;
854     write_dev_state_line(f, "mail", i, "count", mi.logged);
855     write_dev_state_line(f, "mail", i, "first-sent-time", mi.firstsent);
856     write_dev_state_line(f, "mail", i, "last-sent-time", mi.lastsent);
857   }
858 
859   // ATA ONLY
860   write_dev_state_line(f, "ata-error-count", state.ataerrorcount);
861 
862   for (i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
863     const persistent_dev_state::ata_attribute & pa = state.ata_attributes[i];
864     if (!pa.id)
865       continue;
866     write_dev_state_line(f, "ata-smart-attribute", i, "id", pa.id);
867     write_dev_state_line(f, "ata-smart-attribute", i, "val", pa.val);
868     write_dev_state_line(f, "ata-smart-attribute", i, "worst", pa.worst);
869     write_dev_state_line(f, "ata-smart-attribute", i, "raw", pa.raw);
870     write_dev_state_line(f, "ata-smart-attribute", i, "resvd", pa.resvd);
871   }
872 
873   // NVMe only
874   write_dev_state_line(f, "nvme-err-log-entries", state.nvme_err_log_entries);
875 
876   return true;
877 }
878 
879 // Write to the attrlog file
write_dev_attrlog(const char * path,const dev_state & state)880 static bool write_dev_attrlog(const char * path, const dev_state & state)
881 {
882   stdio_file f(path, "a");
883   if (!f) {
884     pout("Cannot create attribute log file \"%s\"\n", path);
885     return false;
886   }
887 
888 
889   time_t now = time(0);
890   struct tm tmbuf, * tms = time_to_tm_local(&tmbuf, now);
891   fprintf(f, "%d-%02d-%02d %02d:%02d:%02d;",
892              1900+tms->tm_year, 1+tms->tm_mon, tms->tm_mday,
893              tms->tm_hour, tms->tm_min, tms->tm_sec);
894   // ATA ONLY
895   for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
896     const persistent_dev_state::ata_attribute & pa = state.ata_attributes[i];
897     if (!pa.id)
898       continue;
899     fprintf(f, "\t%d;%d;%" PRIu64 ";", pa.id, pa.val, pa.raw);
900   }
901   // SCSI ONLY
902   const struct scsiErrorCounter * ecp;
903   const char * pageNames[3] = {"read", "write", "verify"};
904   for (int k = 0; k < 3; ++k) {
905     if ( !state.scsi_error_counters[k].found ) continue;
906     ecp = &state.scsi_error_counters[k].errCounter;
907      fprintf(f, "\t%s-corr-by-ecc-fast;%" PRIu64 ";"
908        "\t%s-corr-by-ecc-delayed;%" PRIu64 ";"
909        "\t%s-corr-by-retry;%" PRIu64 ";"
910        "\t%s-total-err-corrected;%" PRIu64 ";"
911        "\t%s-corr-algorithm-invocations;%" PRIu64 ";"
912        "\t%s-gb-processed;%.3f;"
913        "\t%s-total-unc-errors;%" PRIu64 ";",
914        pageNames[k], ecp->counter[0],
915        pageNames[k], ecp->counter[1],
916        pageNames[k], ecp->counter[2],
917        pageNames[k], ecp->counter[3],
918        pageNames[k], ecp->counter[4],
919        pageNames[k], (ecp->counter[5] / 1000000000.0),
920        pageNames[k], ecp->counter[6]);
921   }
922   if(state.scsi_nonmedium_error.found && state.scsi_nonmedium_error.nme.gotPC0) {
923     fprintf(f, "\tnon-medium-errors;%" PRIu64 ";", state.scsi_nonmedium_error.nme.counterPC0);
924   }
925   // write SCSI current temperature if it is monitored
926   if (state.temperature)
927     fprintf(f, "\ttemperature;%d;", state.temperature);
928   // end of line
929   fprintf(f, "\n");
930   return true;
931 }
932 
933 // Write all state files. If write_always is false, don't write
934 // unless must_write is set.
write_all_dev_states(const dev_config_vector & configs,dev_state_vector & states,bool write_always=true)935 static void write_all_dev_states(const dev_config_vector & configs,
936                                  dev_state_vector & states,
937                                  bool write_always = true)
938 {
939   for (unsigned i = 0; i < states.size(); i++) {
940     const dev_config & cfg = configs.at(i);
941     if (cfg.state_file.empty())
942       continue;
943     dev_state & state = states[i];
944     if (!write_always && !state.must_write)
945       continue;
946     if (!write_dev_state(cfg.state_file.c_str(), state))
947       continue;
948     state.must_write = false;
949     if (write_always || debugmode)
950       PrintOut(LOG_INFO, "Device: %s, state written to %s\n",
951                cfg.name.c_str(), cfg.state_file.c_str());
952   }
953 }
954 
955 // Write to all attrlog files
write_all_dev_attrlogs(const dev_config_vector & configs,dev_state_vector & states)956 static void write_all_dev_attrlogs(const dev_config_vector & configs,
957                                    dev_state_vector & states)
958 {
959   for (unsigned i = 0; i < states.size(); i++) {
960     const dev_config & cfg = configs.at(i);
961     if (cfg.attrlog_file.empty())
962       continue;
963     dev_state & state = states[i];
964     if (state.attrlog_dirty) {
965       write_dev_attrlog(cfg.attrlog_file.c_str(), state);
966       state.attrlog_dirty = false;
967     }
968   }
969 }
970 
971 extern "C" { // signal handlers require C-linkage
972 
973 //  Note if we catch a SIGUSR1
USR1handler(int sig)974 static void USR1handler(int sig)
975 {
976   if (SIGUSR1==sig)
977     caughtsigUSR1=1;
978   return;
979 }
980 
981 #ifdef _WIN32
982 //  Note if we catch a SIGUSR2
USR2handler(int sig)983 static void USR2handler(int sig)
984 {
985   if (SIGUSR2==sig)
986     caughtsigUSR2=1;
987   return;
988 }
989 #endif
990 
991 // Note if we catch a HUP (or INT in debug mode)
HUPhandler(int sig)992 static void HUPhandler(int sig)
993 {
994   if (sig==SIGHUP)
995     caughtsigHUP=1;
996   else
997     caughtsigHUP=2;
998   return;
999 }
1000 
1001 // signal handler for TERM, QUIT, and INT (if not in debug mode)
sighandler(int sig)1002 static void sighandler(int sig)
1003 {
1004   if (!caughtsigEXIT)
1005     caughtsigEXIT=sig;
1006   return;
1007 }
1008 
1009 } // extern "C"
1010 
1011 #ifdef HAVE_LIBCAP_NG
1012 // capabilities(7) support
1013 
1014 static bool capabilities_enabled = false;
1015 
capabilities_drop_now()1016 static void capabilities_drop_now()
1017 {
1018   if (!capabilities_enabled)
1019     return;
1020   capng_clear(CAPNG_SELECT_BOTH);
1021   capng_updatev(CAPNG_ADD, (capng_type_t)(CAPNG_EFFECTIVE|CAPNG_PERMITTED),
1022     CAP_SYS_ADMIN, CAP_MKNOD, CAP_SYS_RAWIO, -1);
1023   capng_apply(CAPNG_SELECT_BOTH);
1024 }
1025 
capabilities_check_config(dev_config_vector & configs)1026 static void capabilities_check_config(dev_config_vector & configs)
1027 {
1028   if (!capabilities_enabled)
1029     return;
1030   for (unsigned i = 0; i < configs.size(); i++) {
1031     dev_config & cfg = configs[i];
1032     if (!cfg.emailaddress.empty() || !cfg.emailcmdline.empty()) {
1033       PrintOut(LOG_INFO, "Device: %s, --capabilites is set, mail will be suppressed.\n",
1034                cfg.name.c_str());
1035       cfg.emailaddress.clear(); cfg.emailcmdline.clear();
1036     }
1037   }
1038 }
1039 
1040 #else // HAVE_LIBCAP_NG
1041 // No capabilities(7) support
1042 
capabilities_drop_now()1043 static inline void capabilities_drop_now() { }
capabilities_check_config(dev_config_vector &)1044 static inline void capabilities_check_config(dev_config_vector &) { }
1045 
1046 #endif // HAVE_LIBCAP_NG
1047 
1048 // a replacement for setenv() which is not available on all platforms.
1049 // Note that the string passed to putenv must not be freed or made
1050 // invalid, since a pointer to it is kept by putenv(). This means that
1051 // it must either be a static buffer or allocated off the heap. The
1052 // string can be freed if the environment variable is redefined via
1053 // another call to putenv(). There is no portable way to unset a variable
1054 // with putenv(). So we manage the buffer in a static object.
1055 // Using setenv() if available is not considered because some
1056 // implementations may produce memory leaks.
1057 
1058 class env_buffer
1059 {
1060 public:
env_buffer()1061   env_buffer()
1062     : m_buf((char *)0) { }
1063 
1064   void set(const char * name, const char * value);
1065 
1066 private:
1067   char * m_buf;
1068 
1069   env_buffer(const env_buffer &);
1070   void operator=(const env_buffer &);
1071 };
1072 
set(const char * name,const char * value)1073 void env_buffer::set(const char * name, const char * value)
1074 {
1075   int size = strlen(name) + 1 + strlen(value) + 1;
1076   char * newbuf = new char[size];
1077   snprintf(newbuf, size, "%s=%s", name, value);
1078 
1079   if (putenv(newbuf))
1080     throw std::runtime_error("putenv() failed");
1081 
1082   // This assumes that the same NAME is passed on each call
1083   delete [] m_buf;
1084   m_buf = newbuf;
1085 }
1086 
1087 #define EBUFLEN 1024
1088 
1089 static void MailWarning(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
1090                         __attribute_format_printf(4, 5);
1091 
1092 // If either address or executable path is non-null then send and log
1093 // a warning email, or execute executable
MailWarning(const dev_config & cfg,dev_state & state,int which,const char * fmt,...)1094 static void MailWarning(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
1095 {
1096   static const char * const whichfail[] = {
1097     "EmailTest",                  // 0
1098     "Health",                     // 1
1099     "Usage",                      // 2
1100     "SelfTest",                   // 3
1101     "ErrorCount",                 // 4
1102     "FailedHealthCheck",          // 5
1103     "FailedReadSmartData",        // 6
1104     "FailedReadSmartErrorLog",    // 7
1105     "FailedReadSmartSelfTestLog", // 8
1106     "FailedOpenDevice",           // 9
1107     "CurrentPendingSector",       // 10
1108     "OfflineUncorrectableSector", // 11
1109     "Temperature"                 // 12
1110   };
1111 
1112   // See if user wants us to send mail
1113   if (cfg.emailaddress.empty() && cfg.emailcmdline.empty())
1114     return;
1115 
1116   std::string address = cfg.emailaddress;
1117   const char * executable = cfg.emailcmdline.c_str();
1118 
1119   // which type of mail are we sending?
1120   mailinfo * mail=(state.maillog)+which;
1121 
1122   // checks for sanity
1123   if (cfg.emailfreq<1 || cfg.emailfreq>3) {
1124     PrintOut(LOG_CRIT,"internal error in MailWarning(): cfg.mailwarn->emailfreq=%d\n",cfg.emailfreq);
1125     return;
1126   }
1127   if (which<0 || which>=SMARTD_NMAIL || sizeof(whichfail)!=SMARTD_NMAIL*sizeof(char *)) {
1128     PrintOut(LOG_CRIT,"Contact " PACKAGE_BUGREPORT "; internal error in MailWarning(): which=%d, size=%d\n",
1129              which, (int)sizeof(whichfail));
1130     return;
1131   }
1132 
1133   // Return if a single warning mail has been sent.
1134   if ((cfg.emailfreq==1) && mail->logged)
1135     return;
1136 
1137   // Return if this is an email test and one has already been sent.
1138   if (which == 0 && mail->logged)
1139     return;
1140 
1141   // To decide if to send mail, we need to know what time it is.
1142   time_t epoch = time(0);
1143 
1144   // Return if less than one day has gone by
1145   const int day = 24*3600;
1146   if (cfg.emailfreq==2 && mail->logged && epoch<(mail->lastsent+day))
1147     return;
1148 
1149   // Return if less than 2^(logged-1) days have gone by
1150   if (cfg.emailfreq==3 && mail->logged) {
1151     int days = 0x01 << (mail->logged - 1);
1152     days*=day;
1153     if  (epoch<(mail->lastsent+days))
1154       return;
1155   }
1156 
1157   // record the time of this mail message, and the first mail message
1158   if (!mail->logged)
1159     mail->firstsent=epoch;
1160   mail->lastsent=epoch;
1161 
1162   // print warning string into message
1163   // Note: Message length may reach ~300 characters as device names may be
1164   // very long on certain platforms (macOS ~230 characters).
1165   // Message length must not exceed email line length limit, see RFC 5322:
1166   // "... MUST be no more than 998 characters, ... excluding the CRLF."
1167   char message[512];
1168   va_list ap;
1169   va_start(ap, fmt);
1170   vsnprintf(message, sizeof(message), fmt, ap);
1171   va_end(ap);
1172 
1173   // replace commas by spaces to separate recipients
1174   std::replace(address.begin(), address.end(), ',', ' ');
1175 
1176   // Export information in environment variables that will be useful
1177   // for user scripts
1178   static env_buffer env[12];
1179   env[0].set("SMARTD_MAILER", executable);
1180   env[1].set("SMARTD_MESSAGE", message);
1181   char dates[DATEANDEPOCHLEN];
1182   snprintf(dates, sizeof(dates), "%d", mail->logged);
1183   env[2].set("SMARTD_PREVCNT", dates);
1184   dateandtimezoneepoch(dates, mail->firstsent);
1185   env[3].set("SMARTD_TFIRST", dates);
1186   snprintf(dates, DATEANDEPOCHLEN,"%d", (int)mail->firstsent);
1187   env[4].set("SMARTD_TFIRSTEPOCH", dates);
1188   env[5].set("SMARTD_FAILTYPE", whichfail[which]);
1189   env[6].set("SMARTD_ADDRESS", address.c_str());
1190   env[7].set("SMARTD_DEVICESTRING", cfg.name.c_str());
1191 
1192   // Allow 'smartctl ... -d $SMARTD_DEVICETYPE $SMARTD_DEVICE'
1193   env[8].set("SMARTD_DEVICETYPE",
1194              (!cfg.dev_type.empty() ? cfg.dev_type.c_str() : "auto"));
1195   env[9].set("SMARTD_DEVICE", cfg.dev_name.c_str());
1196 
1197   env[10].set("SMARTD_DEVICEINFO", cfg.dev_idinfo.c_str());
1198   dates[0] = 0;
1199   if (which) switch (cfg.emailfreq) {
1200     case 2: dates[0] = '1'; dates[1] = 0; break;
1201     case 3: snprintf(dates, sizeof(dates), "%d", (0x01)<<mail->logged);
1202   }
1203   env[11].set("SMARTD_NEXTDAYS", dates);
1204 
1205   // now construct a command to send this as EMAIL
1206   if (!*executable)
1207     executable = "<mail>";
1208   const char * newadd = (!address.empty()? address.c_str() : "<nomailer>");
1209   const char * newwarn = (which? "Warning via" : "Test of");
1210 
1211   char command[256];
1212 #ifdef _WIN32
1213   // Path may contain spaces
1214   snprintf(command, sizeof(command), "\"%s\" 2>&1", warning_script.c_str());
1215 #else
1216   snprintf(command, sizeof(command), "%s 2>&1", warning_script.c_str());
1217 #endif
1218 
1219   // tell SYSLOG what we are about to do...
1220   PrintOut(LOG_INFO,"%s %s to %s ...\n",
1221            which?"Sending warning via":"Executing test of", executable, newadd);
1222 
1223   // issue the command to send mail or to run the user's executable
1224   errno=0;
1225   FILE * pfp;
1226   if (!(pfp=popen(command, "r")))
1227     // failed to popen() mail process
1228     PrintOut(LOG_CRIT,"%s %s to %s: failed (fork or pipe failed, or no memory) %s\n",
1229              newwarn,  executable, newadd, errno?strerror(errno):"");
1230   else {
1231     // pipe succeeded!
1232     int len, status;
1233     char buffer[EBUFLEN];
1234 
1235     // if unexpected output on stdout/stderr, null terminate, print, and flush
1236     if ((len=fread(buffer, 1, EBUFLEN, pfp))) {
1237       int count=0;
1238       int newlen = len<EBUFLEN ? len : EBUFLEN-1;
1239       buffer[newlen]='\0';
1240       PrintOut(LOG_CRIT,"%s %s to %s produced unexpected output (%s%d bytes) to STDOUT/STDERR: \n%s\n",
1241                newwarn, executable, newadd, len!=newlen?"here truncated to ":"", newlen, buffer);
1242 
1243       // flush pipe if needed
1244       while (fread(buffer, 1, EBUFLEN, pfp) && count<EBUFLEN)
1245         count++;
1246 
1247       // tell user that pipe was flushed, or that something is really wrong
1248       if (count && count<EBUFLEN)
1249         PrintOut(LOG_CRIT,"%s %s to %s: flushed remaining STDOUT/STDERR\n",
1250                  newwarn, executable, newadd);
1251       else if (count)
1252         PrintOut(LOG_CRIT,"%s %s to %s: more than 1 MB STDOUT/STDERR flushed, breaking pipe\n",
1253                  newwarn, executable, newadd);
1254     }
1255 
1256     // if something went wrong with mail process, print warning
1257     errno=0;
1258     if (-1==(status=pclose(pfp)))
1259       PrintOut(LOG_CRIT,"%s %s to %s: pclose(3) failed %s\n", newwarn, executable, newadd,
1260                errno?strerror(errno):"");
1261     else {
1262       // mail process apparently succeeded. Check and report exit status
1263       if (WIFEXITED(status)) {
1264         // exited 'normally' (but perhaps with nonzero status)
1265         int status8 = WEXITSTATUS(status);
1266         if (status8>128)
1267           PrintOut(LOG_CRIT,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d) perhaps caught signal %d [%s]\n",
1268                    newwarn, executable, newadd, status, status8, status8-128, strsignal(status8-128));
1269         else if (status8)
1270           PrintOut(LOG_CRIT,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d)\n",
1271                    newwarn, executable, newadd, status, status8);
1272         else
1273           PrintOut(LOG_INFO,"%s %s to %s: successful\n", newwarn, executable, newadd);
1274       }
1275 
1276       if (WIFSIGNALED(status))
1277         PrintOut(LOG_INFO,"%s %s to %s: exited because of uncaught signal %d [%s]\n",
1278                  newwarn, executable, newadd, WTERMSIG(status), strsignal(WTERMSIG(status)));
1279 
1280       // this branch is probably not possible. If subprocess is
1281       // stopped then pclose() should not return.
1282       if (WIFSTOPPED(status))
1283         PrintOut(LOG_CRIT,"%s %s to %s: process STOPPED because it caught signal %d [%s]\n",
1284                  newwarn, executable, newadd, WSTOPSIG(status), strsignal(WSTOPSIG(status)));
1285 
1286     }
1287   }
1288 
1289   // increment mail sent counter
1290   mail->logged++;
1291 }
1292 
1293 static void reset_warning_mail(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
1294                                __attribute_format_printf(4, 5);
1295 
reset_warning_mail(const dev_config & cfg,dev_state & state,int which,const char * fmt,...)1296 static void reset_warning_mail(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
1297 {
1298   if (!(0 <= which && which < SMARTD_NMAIL))
1299     return;
1300 
1301   // Return if no mail sent yet
1302   mailinfo & mi = state.maillog[which];
1303   if (!mi.logged)
1304     return;
1305 
1306   // Format & print message
1307   char msg[256];
1308   va_list ap;
1309   va_start(ap, fmt);
1310   vsnprintf(msg, sizeof(msg), fmt, ap);
1311   va_end(ap);
1312 
1313   PrintOut(LOG_INFO, "Device: %s, %s, warning condition reset after %d email%s\n", cfg.name.c_str(),
1314            msg, mi.logged, (mi.logged==1 ? "" : "s"));
1315 
1316   // Clear mail counter and timestamps
1317   mi = mailinfo();
1318   state.must_write = true;
1319 }
1320 
1321 #ifndef _WIN32
1322 
1323 // Output multiple lines via separate syslog(3) calls.
1324 __attribute_format_printf(2, 0)
vsyslog_lines(int priority,const char * fmt,va_list ap)1325 static void vsyslog_lines(int priority, const char * fmt, va_list ap)
1326 {
1327   char buf[512+EBUFLEN]; // enough space for exec cmd output in MailWarning()
1328   vsnprintf(buf, sizeof(buf), fmt, ap);
1329 
1330   for (char * p = buf, * q; p && *p; p = q) {
1331     if ((q = strchr(p, '\n')))
1332       *q++ = 0;
1333     if (*p)
1334       syslog(priority, "%s\n", p);
1335   }
1336 }
1337 
1338 #else  // _WIN32
1339 // os_win32/syslog_win32.cpp supports multiple lines.
1340 #define vsyslog_lines vsyslog
1341 #endif // _WIN32
1342 
1343 // Printing function for watching ataprint commands, or losing them
1344 // [From GLIBC Manual: Since the prototype doesn't specify types for
1345 // optional arguments, in a call to a variadic function the default
1346 // argument promotions are performed on the optional argument
1347 // values. This means the objects of type char or short int (whether
1348 // signed or not) are promoted to either int or unsigned int, as
1349 // appropriate.]
pout(const char * fmt,...)1350 void pout(const char *fmt, ...){
1351   va_list ap;
1352 
1353   // get the correct time in syslog()
1354   FixGlibcTimeZoneBug();
1355   // initialize variable argument list
1356   va_start(ap,fmt);
1357   // in debugmode==1 mode we will print the output from the ataprint.o functions!
1358   if (debugmode && debugmode != 2) {
1359     FILE * f = stdout;
1360 #ifdef _WIN32
1361     if (facility == LOG_LOCAL1) // logging to stdout
1362       f = stderr;
1363 #endif
1364     vfprintf(f, fmt, ap);
1365     fflush(f);
1366   }
1367   // in debugmode==2 mode we print output from knowndrives.o functions
1368   else if (debugmode==2 || ata_debugmode || scsi_debugmode) {
1369     openlog("smartd", LOG_PID, facility);
1370     vsyslog_lines(LOG_INFO, fmt, ap);
1371     closelog();
1372   }
1373   va_end(ap);
1374   return;
1375 }
1376 
1377 // This function prints either to stdout or to the syslog as needed.
PrintOut(int priority,const char * fmt,...)1378 static void PrintOut(int priority, const char *fmt, ...){
1379   va_list ap;
1380 
1381   // get the correct time in syslog()
1382   FixGlibcTimeZoneBug();
1383   // initialize variable argument list
1384   va_start(ap,fmt);
1385   if (debugmode) {
1386     FILE * f = stdout;
1387 #ifdef _WIN32
1388     if (facility == LOG_LOCAL1) // logging to stdout
1389       f = stderr;
1390 #endif
1391     vfprintf(f, fmt, ap);
1392     fflush(f);
1393   }
1394   else {
1395     openlog("smartd", LOG_PID, facility);
1396     vsyslog_lines(priority, fmt, ap);
1397     closelog();
1398   }
1399   va_end(ap);
1400   return;
1401 }
1402 
1403 // Used to warn users about invalid checksums. Called from atacmds.cpp.
checksumwarning(const char * string)1404 void checksumwarning(const char * string)
1405 {
1406   pout("Warning! %s error: invalid SMART checksum.\n", string);
1407 }
1408 
1409 #ifndef _WIN32
1410 
1411 // Wait for the pid file to show up, this makes sure a calling program knows
1412 // that the daemon is really up and running and has a pid to kill it
WaitForPidFile()1413 static bool WaitForPidFile()
1414 {
1415   int waited, max_wait = 10;
1416   struct stat stat_buf;
1417 
1418   if (pid_file.empty() || debugmode)
1419     return true;
1420 
1421   for(waited = 0; waited < max_wait; ++waited) {
1422     if (!stat(pid_file.c_str(), &stat_buf)) {
1423       return true;
1424     } else
1425       sleep(1);
1426   }
1427   return false;
1428 }
1429 
1430 #endif // _WIN32
1431 
1432 // Forks new process if needed, closes ALL file descriptors,
1433 // redirects stdin, stdout, and stderr.  Not quite daemon().
1434 // See https://www.linuxjournal.com/article/2335
1435 // for a good description of why we do things this way.
daemon_init()1436 static int daemon_init()
1437 {
1438 #ifndef _WIN32
1439 
1440   // flush all buffered streams.  Else we might get two copies of open
1441   // streams since both parent and child get copies of the buffers.
1442   fflush(NULL);
1443 
1444   if (do_fork) {
1445     pid_t pid;
1446     if ((pid=fork()) < 0) {
1447       // unable to fork!
1448       PrintOut(LOG_CRIT,"smartd unable to fork daemon process!\n");
1449       return EXIT_STARTUP;
1450     }
1451     if (pid) {
1452       // we are the parent process, wait for pid file, then exit cleanly
1453       if(!WaitForPidFile()) {
1454         PrintOut(LOG_CRIT,"PID file %s didn't show up!\n", pid_file.c_str());
1455         return EXIT_STARTUP;
1456       }
1457       return 0;
1458     }
1459 
1460     // from here on, we are the child process.
1461     setsid();
1462 
1463     // Fork one more time to avoid any possibility of having terminals
1464     if ((pid=fork()) < 0) {
1465       // unable to fork!
1466       PrintOut(LOG_CRIT,"smartd unable to fork daemon process!\n");
1467       return EXIT_STARTUP;
1468     }
1469     if (pid)
1470       // we are the parent process -- exit cleanly
1471       return 0;
1472 
1473     // Now we are the child's child...
1474   }
1475 
1476   // close any open file descriptors
1477   for (int i = getdtablesize(); --i >= 0; )
1478     close(i);
1479 
1480   // redirect any IO attempts to /dev/null and change to root directory
1481   int fd = open("/dev/null", O_RDWR);
1482   if (!(fd == 0 && dup(fd) == 1 && dup(fd) == 2 && !chdir("/"))) {
1483     PrintOut(LOG_CRIT, "smartd unable to redirect to /dev/null or to chdir to root!\n");
1484     return EXIT_STARTUP;
1485   }
1486   umask(0022);
1487 
1488   if (do_fork)
1489     PrintOut(LOG_INFO, "smartd has fork()ed into background mode. New PID=%d.\n", (int)getpid());
1490 
1491 #else // _WIN32
1492 
1493   // No fork() on native Win32
1494   // Detach this process from console
1495   fflush(NULL);
1496   if (daemon_detach("smartd")) {
1497     PrintOut(LOG_CRIT,"smartd unable to detach from console!\n");
1498     return EXIT_STARTUP;
1499   }
1500   // stdin/out/err now closed if not redirected
1501 
1502 #endif // _WIN32
1503 
1504   // No error, continue in main_worker()
1505   return -1;
1506 }
1507 
1508 // create a PID file containing the current process id
write_pid_file()1509 static bool write_pid_file()
1510 {
1511   if (!pid_file.empty()) {
1512     pid_t pid = getpid();
1513     mode_t old_umask;
1514 #ifndef __CYGWIN__
1515     old_umask = umask(0077); // rwx------
1516 #else
1517     // Cygwin: smartd service runs on system account, ensure PID file can be read by admins
1518     old_umask = umask(0033); // rwxr--r--
1519 #endif
1520 
1521     stdio_file f(pid_file.c_str(), "w");
1522     umask(old_umask);
1523     if (!(f && fprintf(f, "%d\n", (int)pid) > 0 && f.close())) {
1524       PrintOut(LOG_CRIT, "unable to write PID file %s - exiting.\n", pid_file.c_str());
1525       return false;
1526     }
1527     PrintOut(LOG_INFO, "file %s written containing PID %d\n", pid_file.c_str(), (int)pid);
1528   }
1529   return true;
1530 }
1531 
1532 // Prints header identifying version of code and home
PrintHead()1533 static void PrintHead()
1534 {
1535   PrintOut(LOG_INFO, "%s\n", format_version_info("smartd").c_str());
1536 }
1537 
1538 // prints help info for configuration file Directives
Directives()1539 static void Directives()
1540 {
1541   PrintOut(LOG_INFO,
1542            "Configuration file (%s) Directives (after device name):\n"
1543            "  -d TYPE Set the device type: auto, ignore, removable,\n"
1544            "          %s\n"
1545            "  -T TYPE Set the tolerance to one of: normal, permissive\n"
1546            "  -o VAL  Enable/disable automatic offline tests (on/off)\n"
1547            "  -S VAL  Enable/disable attribute autosave (on/off)\n"
1548            "  -n MODE No check if: never, sleep[,N][,q], standby[,N][,q], idle[,N][,q]\n"
1549            "  -H      Monitor SMART Health Status, report if failed\n"
1550            "  -s REG  Do Self-Test at time(s) given by regular expression REG\n"
1551            "  -l TYPE Monitor SMART log or self-test status:\n"
1552            "          error, selftest, xerror, offlinests[,ns], selfteststs[,ns]\n"
1553            "  -l scterc,R,W  Set SCT Error Recovery Control\n"
1554            "  -e      Change device setting: aam,[N|off], apm,[N|off], dsn,[on|off],\n"
1555            "          lookahead,[on|off], security-freeze, standby,[N|off], wcache,[on|off]\n"
1556            "  -f      Monitor 'Usage' Attributes, report failures\n"
1557            "  -m ADD  Send email warning to address ADD\n"
1558            "  -M TYPE Modify email warning behavior (see man page)\n"
1559            "  -p      Report changes in 'Prefailure' Attributes\n"
1560            "  -u      Report changes in 'Usage' Attributes\n"
1561            "  -t      Equivalent to -p and -u Directives\n"
1562            "  -r ID   Also report Raw values of Attribute ID with -p, -u or -t\n"
1563            "  -R ID   Track changes in Attribute ID Raw value with -p, -u or -t\n"
1564            "  -i ID   Ignore Attribute ID for -f Directive\n"
1565            "  -I ID   Ignore Attribute ID for -p, -u or -t Directive\n"
1566            "  -C ID[+] Monitor [increases of] Current Pending Sectors in Attribute ID\n"
1567            "  -U ID[+] Monitor [increases of] Offline Uncorrectable Sectors in Attribute ID\n"
1568            "  -W D,I,C Monitor Temperature D)ifference, I)nformal limit, C)ritical limit\n"
1569            "  -v N,ST Modifies labeling of Attribute N (see man page)  \n"
1570            "  -P TYPE Drive-specific presets: use, ignore, show, showall\n"
1571            "  -a      Default: -H -f -t -l error -l selftest -l selfteststs -C 197 -U 198\n"
1572            "  -F TYPE Use firmware bug workaround:\n"
1573            "          %s\n"
1574            "   #      Comment: text after a hash sign is ignored\n"
1575            "   \\      Line continuation character\n"
1576            "Attribute ID is a decimal integer 1 <= ID <= 255\n"
1577            "Use ID = 0 to turn off -C and/or -U Directives\n"
1578            "Example: /dev/sda -a\n",
1579            configfile,
1580            smi()->get_valid_dev_types_str().c_str(),
1581            get_valid_firmwarebug_args());
1582 }
1583 
1584 /* Returns a pointer to a static string containing a formatted list of the valid
1585    arguments to the option opt or NULL on failure. */
GetValidArgList(char opt)1586 static const char *GetValidArgList(char opt)
1587 {
1588   switch (opt) {
1589   case 'A':
1590   case 's':
1591     return "<PATH_PREFIX>";
1592   case 'B':
1593     return "[+]<FILE_NAME>";
1594   case 'c':
1595     return "<FILE_NAME>, -";
1596   case 'l':
1597     return "daemon, local0, local1, local2, local3, local4, local5, local6, local7";
1598   case 'q':
1599     return "nodev, errors, nodevstartup, never, onecheck, showtests";
1600   case 'r':
1601     return "ioctl[,N], ataioctl[,N], scsiioctl[,N], nvmeioctl[,N]";
1602   case 'p':
1603   case 'w':
1604     return "<FILE_NAME>";
1605   case 'i':
1606     return "<INTEGER_SECONDS>";
1607   default:
1608     return NULL;
1609   }
1610 }
1611 
1612 /* prints help information for command syntax */
Usage()1613 static void Usage()
1614 {
1615   PrintOut(LOG_INFO,"Usage: smartd [options]\n\n");
1616   PrintOut(LOG_INFO,"  -A PREFIX, --attributelog=PREFIX\n");
1617   PrintOut(LOG_INFO,"        Log ATA attribute information to {PREFIX}MODEL-SERIAL.ata.csv\n");
1618 #ifdef SMARTMONTOOLS_ATTRIBUTELOG
1619   PrintOut(LOG_INFO,"        [default is " SMARTMONTOOLS_ATTRIBUTELOG "MODEL-SERIAL.ata.csv]\n");
1620 #endif
1621   PrintOut(LOG_INFO,"\n");
1622   PrintOut(LOG_INFO,"  -B [+]FILE, --drivedb=[+]FILE\n");
1623   PrintOut(LOG_INFO,"        Read and replace [add] drive database from FILE\n");
1624   PrintOut(LOG_INFO,"        [default is +%s", get_drivedb_path_add());
1625 #ifdef SMARTMONTOOLS_DRIVEDBDIR
1626   PrintOut(LOG_INFO,"\n");
1627   PrintOut(LOG_INFO,"         and then    %s", get_drivedb_path_default());
1628 #endif
1629   PrintOut(LOG_INFO,"]\n\n");
1630   PrintOut(LOG_INFO,"  -c NAME|-, --configfile=NAME|-\n");
1631   PrintOut(LOG_INFO,"        Read configuration file NAME or stdin\n");
1632   PrintOut(LOG_INFO,"        [default is %s]\n\n", configfile);
1633 #ifdef HAVE_LIBCAP_NG
1634   PrintOut(LOG_INFO,"  -C, --capabilities\n");
1635   PrintOut(LOG_INFO,"        Drop unneeded Linux process capabilities.\n"
1636                     "        Warning: Mail notification does not work when used.\n\n");
1637 #endif
1638   PrintOut(LOG_INFO,"  -d, --debug\n");
1639   PrintOut(LOG_INFO,"        Start smartd in debug mode\n\n");
1640   PrintOut(LOG_INFO,"  -D, --showdirectives\n");
1641   PrintOut(LOG_INFO,"        Print the configuration file Directives and exit\n\n");
1642   PrintOut(LOG_INFO,"  -h, --help, --usage\n");
1643   PrintOut(LOG_INFO,"        Display this help and exit\n\n");
1644   PrintOut(LOG_INFO,"  -i N, --interval=N\n");
1645   PrintOut(LOG_INFO,"        Set interval between disk checks to N seconds, where N >= 10\n\n");
1646   PrintOut(LOG_INFO,"  -l local[0-7], --logfacility=local[0-7]\n");
1647 #ifndef _WIN32
1648   PrintOut(LOG_INFO,"        Use syslog facility local0 - local7 or daemon [default]\n\n");
1649 #else
1650   PrintOut(LOG_INFO,"        Log to \"./smartd.log\", stdout, stderr [default is event log]\n\n");
1651 #endif
1652 #ifndef _WIN32
1653   PrintOut(LOG_INFO,"  -n, --no-fork\n");
1654   PrintOut(LOG_INFO,"        Do not fork into background\n");
1655 #ifdef HAVE_LIBSYSTEMD
1656   PrintOut(LOG_INFO,"        (systemd 'Type=notify' is assumed if $NOTIFY_SOCKET is set)\n");
1657 #endif // HAVE_LIBSYSTEMD
1658   PrintOut(LOG_INFO,"\n");
1659 #endif // WIN32
1660   PrintOut(LOG_INFO,"  -p NAME, --pidfile=NAME\n");
1661   PrintOut(LOG_INFO,"        Write PID file NAME\n\n");
1662   PrintOut(LOG_INFO,"  -q WHEN, --quit=WHEN\n");
1663   PrintOut(LOG_INFO,"        Quit on one of: %s\n\n", GetValidArgList('q'));
1664   PrintOut(LOG_INFO,"  -r, --report=TYPE\n");
1665   PrintOut(LOG_INFO,"        Report transactions for one of: %s\n\n", GetValidArgList('r'));
1666   PrintOut(LOG_INFO,"  -s PREFIX, --savestates=PREFIX\n");
1667   PrintOut(LOG_INFO,"        Save disk states to {PREFIX}MODEL-SERIAL.TYPE.state\n");
1668 #ifdef SMARTMONTOOLS_SAVESTATES
1669   PrintOut(LOG_INFO,"        [default is " SMARTMONTOOLS_SAVESTATES "MODEL-SERIAL.TYPE.state]\n");
1670 #endif
1671   PrintOut(LOG_INFO,"\n");
1672   PrintOut(LOG_INFO,"  -w NAME, --warnexec=NAME\n");
1673   PrintOut(LOG_INFO,"        Run executable NAME on warnings\n");
1674 #ifndef _WIN32
1675   PrintOut(LOG_INFO,"        [default is " SMARTMONTOOLS_SMARTDSCRIPTDIR "/smartd_warning.sh]\n\n");
1676 #else
1677   PrintOut(LOG_INFO,"        [default is %s/smartd_warning.cmd]\n\n", get_exe_dir().c_str());
1678 #endif
1679 #ifdef _WIN32
1680   PrintOut(LOG_INFO,"  --service\n");
1681   PrintOut(LOG_INFO,"        Running as windows service (see man page), install with:\n");
1682   PrintOut(LOG_INFO,"          smartd install [options]\n");
1683   PrintOut(LOG_INFO,"        Remove service with:\n");
1684   PrintOut(LOG_INFO,"          smartd remove\n\n");
1685 #endif // _WIN32
1686   PrintOut(LOG_INFO,"  -V, --version, --license, --copyright\n");
1687   PrintOut(LOG_INFO,"        Print License, Copyright, and version information\n");
1688 }
1689 
CloseDevice(smart_device * device,const char * name)1690 static int CloseDevice(smart_device * device, const char * name)
1691 {
1692   if (!device->close()){
1693     PrintOut(LOG_INFO,"Device: %s, %s, close() failed\n", name, device->get_errmsg());
1694     return 1;
1695   }
1696   // device successfully closed
1697   return 0;
1698 }
1699 
1700 // return true if a char is not allowed in a state file name
not_allowed_in_filename(char c)1701 static bool not_allowed_in_filename(char c)
1702 {
1703   return !(   ('0' <= c && c <= '9')
1704            || ('A' <= c && c <= 'Z')
1705            || ('a' <= c && c <= 'z'));
1706 }
1707 
1708 // Read error count from Summary or Extended Comprehensive SMART error log
1709 // Return -1 on error
read_ata_error_count(ata_device * device,const char * name,firmwarebug_defs firmwarebugs,bool extended)1710 static int read_ata_error_count(ata_device * device, const char * name,
1711                                 firmwarebug_defs firmwarebugs, bool extended)
1712 {
1713   if (!extended) {
1714     ata_smart_errorlog log;
1715     if (ataReadErrorLog(device, &log, firmwarebugs)){
1716       PrintOut(LOG_INFO,"Device: %s, Read Summary SMART Error Log failed\n",name);
1717       return -1;
1718     }
1719     return (log.error_log_pointer ? log.ata_error_count : 0);
1720   }
1721   else {
1722     ata_smart_exterrlog logx;
1723     if (!ataReadExtErrorLog(device, &logx, 0, 1 /*first sector only*/, firmwarebugs)) {
1724       PrintOut(LOG_INFO,"Device: %s, Read Extended Comprehensive SMART Error Log failed\n",name);
1725       return -1;
1726     }
1727     // Some disks use the reserved byte as index, see ataprint.cpp.
1728     return (logx.error_log_index || logx.reserved1 ? logx.device_error_count : 0);
1729   }
1730 }
1731 
1732 // returns <0 if problem.  Otherwise, bottom 8 bits are the self test
1733 // error count, and top bits are the power-on hours of the last error.
SelfTestErrorCount(ata_device * device,const char * name,firmwarebug_defs firmwarebugs)1734 static int SelfTestErrorCount(ata_device * device, const char * name,
1735                               firmwarebug_defs firmwarebugs)
1736 {
1737   struct ata_smart_selftestlog log;
1738 
1739   if (ataReadSelfTestLog(device, &log, firmwarebugs)){
1740     PrintOut(LOG_INFO,"Device: %s, Read SMART Self Test Log Failed\n",name);
1741     return -1;
1742   }
1743 
1744   if (!log.mostrecenttest)
1745     // No tests logged
1746     return 0;
1747 
1748   // Count failed self-tests
1749   int errcnt = 0, hours = 0;
1750   for (int i = 20; i >= 0; i--) {
1751     int j = (i + log.mostrecenttest) % 21;
1752     const ata_smart_selftestlog_struct & entry = log.selftest_struct[j];
1753     if (!nonempty(&entry, sizeof(entry)))
1754       continue;
1755 
1756     int status = entry.selfteststatus >> 4;
1757     if (status == 0x0 && (entry.selftestnumber & 0x7f) == 0x02)
1758       // First successful extended self-test, stop count
1759       break;
1760 
1761     if (0x3 <= status && status <= 0x8) {
1762       // Self-test showed an error
1763       errcnt++;
1764       // Keep track of time of most recent error
1765       if (!hours)
1766         hours = entry.timestamp;
1767     }
1768   }
1769 
1770   return ((hours << 8) | errcnt);
1771 }
1772 
1773 #define SELFTEST_ERRORCOUNT(x) (x & 0xff)
1774 #define SELFTEST_ERRORHOURS(x) ((x >> 8) & 0xffff)
1775 
1776 // Check offline data collection status
is_offl_coll_in_progress(unsigned char status)1777 static inline bool is_offl_coll_in_progress(unsigned char status)
1778 {
1779   return ((status & 0x7f) == 0x03);
1780 }
1781 
1782 // Check self-test execution status
is_self_test_in_progress(unsigned char status)1783 static inline bool is_self_test_in_progress(unsigned char status)
1784 {
1785   return ((status >> 4) == 0xf);
1786 }
1787 
1788 // Log offline data collection status
log_offline_data_coll_status(const char * name,unsigned char status)1789 static void log_offline_data_coll_status(const char * name, unsigned char status)
1790 {
1791   const char * msg;
1792   switch (status & 0x7f) {
1793     case 0x00: msg = "was never started"; break;
1794     case 0x02: msg = "was completed without error"; break;
1795     case 0x03: msg = "is in progress"; break;
1796     case 0x04: msg = "was suspended by an interrupting command from host"; break;
1797     case 0x05: msg = "was aborted by an interrupting command from host"; break;
1798     case 0x06: msg = "was aborted by the device with a fatal error"; break;
1799     default:   msg = 0;
1800   }
1801 
1802   if (msg)
1803     PrintOut(((status & 0x7f) == 0x06 ? LOG_CRIT : LOG_INFO),
1804              "Device: %s, offline data collection %s%s\n", name, msg,
1805              ((status & 0x80) ? " (auto:on)" : ""));
1806   else
1807     PrintOut(LOG_INFO, "Device: %s, unknown offline data collection status 0x%02x\n",
1808              name, status);
1809 }
1810 
1811 // Log self-test execution status
log_self_test_exec_status(const char * name,unsigned char status)1812 static void log_self_test_exec_status(const char * name, unsigned char status)
1813 {
1814   const char * msg;
1815   switch (status >> 4) {
1816     case 0x0: msg = "completed without error"; break;
1817     case 0x1: msg = "was aborted by the host"; break;
1818     case 0x2: msg = "was interrupted by the host with a reset"; break;
1819     case 0x3: msg = "could not complete due to a fatal or unknown error"; break;
1820     case 0x4: msg = "completed with error (unknown test element)"; break;
1821     case 0x5: msg = "completed with error (electrical test element)"; break;
1822     case 0x6: msg = "completed with error (servo/seek test element)"; break;
1823     case 0x7: msg = "completed with error (read test element)"; break;
1824     case 0x8: msg = "completed with error (handling damage?)"; break;
1825     default:  msg = 0;
1826   }
1827 
1828   if (msg)
1829     PrintOut(((status >> 4) >= 0x4 ? LOG_CRIT : LOG_INFO),
1830              "Device: %s, previous self-test %s\n", name, msg);
1831   else if ((status >> 4) == 0xf)
1832     PrintOut(LOG_INFO, "Device: %s, self-test in progress, %u0%% remaining\n",
1833              name, status & 0x0f);
1834   else
1835     PrintOut(LOG_INFO, "Device: %s, unknown self-test status 0x%02x\n",
1836              name, status);
1837 }
1838 
1839 // Check pending sector count id (-C, -U directives).
check_pending_id(const dev_config & cfg,const dev_state & state,unsigned char id,const char * msg)1840 static bool check_pending_id(const dev_config & cfg, const dev_state & state,
1841                              unsigned char id, const char * msg)
1842 {
1843   // Check attribute index
1844   int i = ata_find_attr_index(id, state.smartval);
1845   if (i < 0) {
1846     PrintOut(LOG_INFO, "Device: %s, can't monitor %s count - no Attribute %d\n",
1847              cfg.name.c_str(), msg, id);
1848     return false;
1849   }
1850 
1851   // Check value
1852   uint64_t rawval = ata_get_attr_raw_value(state.smartval.vendor_attributes[i],
1853     cfg.attribute_defs);
1854   if (rawval >= (state.num_sectors ? state.num_sectors : 0xffffffffULL)) {
1855     PrintOut(LOG_INFO, "Device: %s, ignoring %s count - bogus Attribute %d value %" PRIu64 " (0x%" PRIx64 ")\n",
1856              cfg.name.c_str(), msg, id, rawval, rawval);
1857     return false;
1858   }
1859 
1860   return true;
1861 }
1862 
1863 // Called by ATA/SCSI/NVMeDeviceScan() after successful device check
finish_device_scan(dev_config & cfg,dev_state & state)1864 static void finish_device_scan(dev_config & cfg, dev_state & state)
1865 {
1866   // Set cfg.emailfreq if user hasn't set it
1867   if ((!cfg.emailaddress.empty() || !cfg.emailcmdline.empty()) && !cfg.emailfreq) {
1868     // Avoid that emails are suppressed forever due to state persistence
1869     if (cfg.state_file.empty())
1870       cfg.emailfreq = 1; // '-M once'
1871     else
1872       cfg.emailfreq = 2; // '-M daily'
1873   }
1874 
1875   // Start self-test regex check now if time was not read from state file
1876   if (!cfg.test_regex.empty() && !state.scheduled_test_next_check)
1877     state.scheduled_test_next_check = time(0);
1878 }
1879 
1880 // Common function to format result message for ATA setting
format_set_result_msg(std::string & msg,const char * name,bool ok,int set_option=0,bool has_value=false)1881 static void format_set_result_msg(std::string & msg, const char * name, bool ok,
1882                                   int set_option = 0, bool has_value = false)
1883 {
1884   if (!msg.empty())
1885     msg += ", ";
1886   msg += name;
1887   if (!ok)
1888     msg += ":--";
1889   else if (set_option < 0)
1890     msg += ":off";
1891   else if (has_value)
1892     msg += strprintf(":%d", set_option-1);
1893   else if (set_option > 0)
1894     msg += ":on";
1895 }
1896 
1897 // Return true and print message if CFG.dev_idinfo is already in PREV_CFGS
is_duplicate_dev_idinfo(const dev_config & cfg,const dev_config_vector & prev_cfgs)1898 static bool is_duplicate_dev_idinfo(const dev_config & cfg, const dev_config_vector & prev_cfgs)
1899 {
1900   if (!cfg.id_is_unique)
1901     return false;
1902 
1903   for (unsigned i = 0; i < prev_cfgs.size(); i++) {
1904     if (!prev_cfgs[i].id_is_unique)
1905       continue;
1906     if (cfg.dev_idinfo != prev_cfgs[i].dev_idinfo)
1907       continue;
1908 
1909     PrintOut(LOG_INFO, "Device: %s, same identity as %s, ignored\n",
1910              cfg.dev_name.c_str(), prev_cfgs[i].dev_name.c_str());
1911     return true;
1912   }
1913 
1914   return false;
1915 }
1916 
1917 // TODO: Add '-F swapid' directive
1918 const bool fix_swapped_id = false;
1919 
1920 // scan to see what ata devices there are, and if they support SMART
ATADeviceScan(dev_config & cfg,dev_state & state,ata_device * atadev,const dev_config_vector * prev_cfgs)1921 static int ATADeviceScan(dev_config & cfg, dev_state & state, ata_device * atadev,
1922                          const dev_config_vector * prev_cfgs)
1923 {
1924   int supported=0;
1925   struct ata_identify_device drive;
1926   const char *name = cfg.name.c_str();
1927   int retid;
1928 
1929   // Device must be open
1930 
1931   // Get drive identity structure
1932   if ((retid = ata_read_identity(atadev, &drive, fix_swapped_id))) {
1933     if (retid<0)
1934       // Unable to read Identity structure
1935       PrintOut(LOG_INFO,"Device: %s, not ATA, no IDENTIFY DEVICE Structure\n",name);
1936     else
1937       PrintOut(LOG_INFO,"Device: %s, packet devices [this device %s] not SMART capable\n",
1938                name, packetdevicetype(retid-1));
1939     CloseDevice(atadev, name);
1940     return 2;
1941   }
1942 
1943   // Get drive identity, size and rotation rate (HDD/SSD)
1944   char model[40+1], serial[20+1], firmware[8+1];
1945   ata_format_id_string(model, drive.model, sizeof(model)-1);
1946   ata_format_id_string(serial, drive.serial_no, sizeof(serial)-1);
1947   ata_format_id_string(firmware, drive.fw_rev, sizeof(firmware)-1);
1948 
1949   ata_size_info sizes;
1950   ata_get_size_info(&drive, sizes);
1951   state.num_sectors = sizes.sectors;
1952   cfg.dev_rpm = ata_get_rotation_rate(&drive);
1953 
1954   char wwn[64]; wwn[0] = 0;
1955   unsigned oui = 0; uint64_t unique_id = 0;
1956   int naa = ata_get_wwn(&drive, oui, unique_id);
1957   if (naa >= 0)
1958     snprintf(wwn, sizeof(wwn), "WWN:%x-%06x-%09" PRIx64 ", ", naa, oui, unique_id);
1959 
1960   // Format device id string for warning emails
1961   char cap[32];
1962   cfg.dev_idinfo = strprintf("%s, S/N:%s, %sFW:%s, %s", model, serial, wwn, firmware,
1963                      format_capacity(cap, sizeof(cap), sizes.capacity, "."));
1964   cfg.id_is_unique = true; // TODO: Check serial?
1965 
1966   PrintOut(LOG_INFO, "Device: %s, %s\n", name, cfg.dev_idinfo.c_str());
1967 
1968   // Check for duplicates
1969   if (prev_cfgs && is_duplicate_dev_idinfo(cfg, *prev_cfgs)) {
1970     CloseDevice(atadev, name);
1971     return 1;
1972   }
1973 
1974   // Show if device in database, and use preset vendor attribute
1975   // options unless user has requested otherwise.
1976   if (cfg.ignorepresets)
1977     PrintOut(LOG_INFO, "Device: %s, smartd database not searched (Directive: -P ignore).\n", name);
1978   else {
1979     // Apply vendor specific presets, print warning if present
1980     const drive_settings * dbentry = lookup_drive_apply_presets(
1981       &drive, cfg.attribute_defs, cfg.firmwarebugs);
1982     if (!dbentry)
1983       PrintOut(LOG_INFO, "Device: %s, not found in smartd database.\n", name);
1984     else {
1985       PrintOut(LOG_INFO, "Device: %s, found in smartd database%s%s\n",
1986         name, (*dbentry->modelfamily ? ": " : "."), (*dbentry->modelfamily ? dbentry->modelfamily : ""));
1987       if (*dbentry->warningmsg)
1988         PrintOut(LOG_CRIT, "Device: %s, WARNING: %s\n", name, dbentry->warningmsg);
1989     }
1990   }
1991 
1992   // Check for ATA Security LOCK
1993   unsigned short word128 = drive.words088_255[128-88];
1994   bool locked = ((word128 & 0x0007) == 0x0007); // LOCKED|ENABLED|SUPPORTED
1995   if (locked)
1996     PrintOut(LOG_INFO, "Device: %s, ATA Security is **LOCKED**\n", name);
1997 
1998   // Set default '-C 197[+]' if no '-C ID' is specified.
1999   if (!cfg.curr_pending_set)
2000     cfg.curr_pending_id = get_unc_attr_id(false, cfg.attribute_defs, cfg.curr_pending_incr);
2001   // Set default '-U 198[+]' if no '-U ID' is specified.
2002   if (!cfg.offl_pending_set)
2003     cfg.offl_pending_id = get_unc_attr_id(true, cfg.attribute_defs, cfg.offl_pending_incr);
2004 
2005   // If requested, show which presets would be used for this drive
2006   if (cfg.showpresets) {
2007     int savedebugmode=debugmode;
2008     PrintOut(LOG_INFO, "Device %s: presets are:\n", name);
2009     if (!debugmode)
2010       debugmode=2;
2011     show_presets(&drive);
2012     debugmode=savedebugmode;
2013   }
2014 
2015   // see if drive supports SMART
2016   supported=ataSmartSupport(&drive);
2017   if (supported!=1) {
2018     if (supported==0)
2019       // drive does NOT support SMART
2020       PrintOut(LOG_INFO,"Device: %s, lacks SMART capability\n",name);
2021     else
2022       // can't tell if drive supports SMART
2023       PrintOut(LOG_INFO,"Device: %s, ATA IDENTIFY DEVICE words 82-83 don't specify if SMART capable.\n",name);
2024 
2025     // should we proceed anyway?
2026     if (cfg.permissive) {
2027       PrintOut(LOG_INFO,"Device: %s, proceeding since '-T permissive' Directive given.\n",name);
2028     }
2029     else {
2030       PrintOut(LOG_INFO,"Device: %s, to proceed anyway, use '-T permissive' Directive.\n",name);
2031       CloseDevice(atadev, name);
2032       return 2;
2033     }
2034   }
2035 
2036   if (ataEnableSmart(atadev)) {
2037     // Enable SMART command has failed
2038     PrintOut(LOG_INFO,"Device: %s, could not enable SMART capability\n",name);
2039 
2040     if (ataIsSmartEnabled(&drive) <= 0) {
2041       if (!cfg.permissive) {
2042         PrintOut(LOG_INFO, "Device: %s, to proceed anyway, use '-T permissive' Directive.\n", name);
2043         CloseDevice(atadev, name);
2044         return 2;
2045       }
2046       PrintOut(LOG_INFO, "Device: %s, proceeding since '-T permissive' Directive given.\n", name);
2047     }
2048     else {
2049       PrintOut(LOG_INFO, "Device: %s, proceeding since SMART is already enabled\n", name);
2050     }
2051   }
2052 
2053   // disable device attribute autosave...
2054   if (cfg.autosave==1) {
2055     if (ataDisableAutoSave(atadev))
2056       PrintOut(LOG_INFO,"Device: %s, could not disable SMART Attribute Autosave.\n",name);
2057     else
2058       PrintOut(LOG_INFO,"Device: %s, disabled SMART Attribute Autosave.\n",name);
2059   }
2060 
2061   // or enable device attribute autosave
2062   if (cfg.autosave==2) {
2063     if (ataEnableAutoSave(atadev))
2064       PrintOut(LOG_INFO,"Device: %s, could not enable SMART Attribute Autosave.\n",name);
2065     else
2066       PrintOut(LOG_INFO,"Device: %s, enabled SMART Attribute Autosave.\n",name);
2067   }
2068 
2069   // capability check: SMART status
2070   if (cfg.smartcheck && ataSmartStatus2(atadev) == -1) {
2071     PrintOut(LOG_INFO,"Device: %s, not capable of SMART Health Status check\n",name);
2072     cfg.smartcheck = false;
2073   }
2074 
2075   // capability check: Read smart values and thresholds.  Note that
2076   // smart values are ALSO needed even if we ONLY want to know if the
2077   // device is self-test log or error-log capable!  After ATA-5, this
2078   // information was ALSO reproduced in the IDENTIFY DEVICE response,
2079   // but sadly not for ATA-5.  Sigh.
2080 
2081   // do we need to get SMART data?
2082   bool smart_val_ok = false;
2083   if (   cfg.autoofflinetest || cfg.selftest
2084       || cfg.errorlog        || cfg.xerrorlog
2085       || cfg.offlinests      || cfg.selfteststs
2086       || cfg.usagefailed     || cfg.prefail  || cfg.usage
2087       || cfg.tempdiff        || cfg.tempinfo || cfg.tempcrit
2088       || cfg.curr_pending_id || cfg.offl_pending_id         ) {
2089 
2090     if (ataReadSmartValues(atadev, &state.smartval)) {
2091       PrintOut(LOG_INFO, "Device: %s, Read SMART Values failed\n", name);
2092       cfg.usagefailed = cfg.prefail = cfg.usage = false;
2093       cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
2094       cfg.curr_pending_id = cfg.offl_pending_id = 0;
2095     }
2096     else {
2097       smart_val_ok = true;
2098       if (ataReadSmartThresholds(atadev, &state.smartthres)) {
2099         PrintOut(LOG_INFO, "Device: %s, Read SMART Thresholds failed%s\n",
2100                  name, (cfg.usagefailed ? ", ignoring -f Directive" : ""));
2101         cfg.usagefailed = false;
2102         // Let ata_get_attr_state() return ATTRSTATE_NO_THRESHOLD:
2103         memset(&state.smartthres, 0, sizeof(state.smartthres));
2104       }
2105     }
2106 
2107     // see if the necessary Attribute is there to monitor offline or
2108     // current pending sectors or temperature
2109     if (   cfg.curr_pending_id
2110         && !check_pending_id(cfg, state, cfg.curr_pending_id,
2111               "Current_Pending_Sector"))
2112       cfg.curr_pending_id = 0;
2113 
2114     if (   cfg.offl_pending_id
2115         && !check_pending_id(cfg, state, cfg.offl_pending_id,
2116               "Offline_Uncorrectable"))
2117       cfg.offl_pending_id = 0;
2118 
2119     if (   (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
2120         && !ata_return_temperature_value(&state.smartval, cfg.attribute_defs)) {
2121       PrintOut(LOG_INFO, "Device: %s, can't monitor Temperature, ignoring -W %d,%d,%d\n",
2122                name, cfg.tempdiff, cfg.tempinfo, cfg.tempcrit);
2123       cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
2124     }
2125 
2126     // Report ignored '-r' or '-R' directives
2127     for (int id = 1; id <= 255; id++) {
2128       if (cfg.monitor_attr_flags.is_set(id, MONITOR_RAW_PRINT)) {
2129         char opt = (!cfg.monitor_attr_flags.is_set(id, MONITOR_RAW) ? 'r' : 'R');
2130         const char * excl = (cfg.monitor_attr_flags.is_set(id,
2131           (opt == 'r' ? MONITOR_AS_CRIT : MONITOR_RAW_AS_CRIT)) ? "!" : "");
2132 
2133         int idx = ata_find_attr_index(id, state.smartval);
2134         if (idx < 0)
2135           PrintOut(LOG_INFO,"Device: %s, no Attribute %d, ignoring -%c %d%s\n", name, id, opt, id, excl);
2136         else {
2137           bool prefail = !!ATTRIBUTE_FLAGS_PREFAILURE(state.smartval.vendor_attributes[idx].flags);
2138           if (!((prefail && cfg.prefail) || (!prefail && cfg.usage)))
2139             PrintOut(LOG_INFO,"Device: %s, not monitoring %s Attributes, ignoring -%c %d%s\n", name,
2140                      (prefail ? "Prefailure" : "Usage"), opt, id, excl);
2141         }
2142       }
2143     }
2144   }
2145 
2146   // enable/disable automatic on-line testing
2147   if (cfg.autoofflinetest) {
2148     // is this an enable or disable request?
2149     const char *what=(cfg.autoofflinetest==1)?"disable":"enable";
2150     if (!smart_val_ok)
2151       PrintOut(LOG_INFO,"Device: %s, could not %s SMART Automatic Offline Testing.\n",name, what);
2152     else {
2153       // if command appears unsupported, issue a warning...
2154       if (!isSupportAutomaticTimer(&state.smartval))
2155         PrintOut(LOG_INFO,"Device: %s, SMART Automatic Offline Testing unsupported...\n",name);
2156       // ... but then try anyway
2157       if ((cfg.autoofflinetest==1)?ataDisableAutoOffline(atadev):ataEnableAutoOffline(atadev))
2158         PrintOut(LOG_INFO,"Device: %s, %s SMART Automatic Offline Testing failed.\n", name, what);
2159       else
2160         PrintOut(LOG_INFO,"Device: %s, %sd SMART Automatic Offline Testing.\n", name, what);
2161     }
2162   }
2163 
2164   // Read log directories if required for capability check
2165   ata_smart_log_directory smart_logdir, gp_logdir;
2166   bool smart_logdir_ok = false, gp_logdir_ok = false;
2167 
2168   if (   isGeneralPurposeLoggingCapable(&drive)
2169       && (cfg.errorlog || cfg.selftest)
2170       && !cfg.firmwarebugs.is_set(BUG_NOLOGDIR)) {
2171       if (!ataReadLogDirectory(atadev, &smart_logdir, false))
2172         smart_logdir_ok = true;
2173   }
2174 
2175   if (cfg.xerrorlog && !cfg.firmwarebugs.is_set(BUG_NOLOGDIR)) {
2176     if (!ataReadLogDirectory(atadev, &gp_logdir, true))
2177       gp_logdir_ok = true;
2178   }
2179 
2180   // capability check: self-test-log
2181   state.selflogcount = 0; state.selfloghour = 0;
2182   if (cfg.selftest) {
2183     int retval;
2184     if (!(   cfg.permissive
2185           || ( smart_logdir_ok && smart_logdir.entry[0x06-1].numsectors)
2186           || (!smart_logdir_ok && smart_val_ok && isSmartTestLogCapable(&state.smartval, &drive)))) {
2187       PrintOut(LOG_INFO, "Device: %s, no SMART Self-test Log, ignoring -l selftest (override with -T permissive)\n", name);
2188       cfg.selftest = false;
2189     }
2190     else if ((retval = SelfTestErrorCount(atadev, name, cfg.firmwarebugs)) < 0) {
2191       PrintOut(LOG_INFO, "Device: %s, no SMART Self-test Log, ignoring -l selftest\n", name);
2192       cfg.selftest = false;
2193     }
2194     else {
2195       state.selflogcount=SELFTEST_ERRORCOUNT(retval);
2196       state.selfloghour =SELFTEST_ERRORHOURS(retval);
2197     }
2198   }
2199 
2200   // capability check: ATA error log
2201   state.ataerrorcount = 0;
2202   if (cfg.errorlog) {
2203     int errcnt1;
2204     if (!(   cfg.permissive
2205           || ( smart_logdir_ok && smart_logdir.entry[0x01-1].numsectors)
2206           || (!smart_logdir_ok && smart_val_ok && isSmartErrorLogCapable(&state.smartval, &drive)))) {
2207       PrintOut(LOG_INFO, "Device: %s, no SMART Error Log, ignoring -l error (override with -T permissive)\n", name);
2208       cfg.errorlog = false;
2209     }
2210     else if ((errcnt1 = read_ata_error_count(atadev, name, cfg.firmwarebugs, false)) < 0) {
2211       PrintOut(LOG_INFO, "Device: %s, no SMART Error Log, ignoring -l error\n", name);
2212       cfg.errorlog = false;
2213     }
2214     else
2215       state.ataerrorcount = errcnt1;
2216   }
2217 
2218   if (cfg.xerrorlog) {
2219     int errcnt2;
2220     if (!(   cfg.permissive || cfg.firmwarebugs.is_set(BUG_NOLOGDIR)
2221           || (gp_logdir_ok && gp_logdir.entry[0x03-1].numsectors)   )) {
2222       PrintOut(LOG_INFO, "Device: %s, no Extended Comprehensive SMART Error Log, ignoring -l xerror (override with -T permissive)\n",
2223                name);
2224       cfg.xerrorlog = false;
2225     }
2226     else if ((errcnt2 = read_ata_error_count(atadev, name, cfg.firmwarebugs, true)) < 0) {
2227       PrintOut(LOG_INFO, "Device: %s, no Extended Comprehensive SMART Error Log, ignoring -l xerror\n", name);
2228       cfg.xerrorlog = false;
2229     }
2230     else if (cfg.errorlog && state.ataerrorcount != errcnt2) {
2231       PrintOut(LOG_INFO, "Device: %s, SMART Error Logs report different error counts: %d != %d\n",
2232                name, state.ataerrorcount, errcnt2);
2233       // Record max error count
2234       if (errcnt2 > state.ataerrorcount)
2235         state.ataerrorcount = errcnt2;
2236     }
2237     else
2238       state.ataerrorcount = errcnt2;
2239   }
2240 
2241   // capability check: self-test and offline data collection status
2242   if (cfg.offlinests || cfg.selfteststs) {
2243     if (!(cfg.permissive || (smart_val_ok && state.smartval.offline_data_collection_capability))) {
2244       if (cfg.offlinests)
2245         PrintOut(LOG_INFO, "Device: %s, no SMART Offline Data Collection capability, ignoring -l offlinests (override with -T permissive)\n", name);
2246       if (cfg.selfteststs)
2247         PrintOut(LOG_INFO, "Device: %s, no SMART Self-test capability, ignoring -l selfteststs (override with -T permissive)\n", name);
2248       cfg.offlinests = cfg.selfteststs = false;
2249     }
2250   }
2251 
2252   // capabilities check -- does it support powermode?
2253   if (cfg.powermode) {
2254     int powermode = ataCheckPowerMode(atadev);
2255 
2256     if (-1 == powermode) {
2257       PrintOut(LOG_CRIT, "Device: %s, no ATA CHECK POWER STATUS support, ignoring -n Directive\n", name);
2258       cfg.powermode=0;
2259     }
2260     else if (powermode!=0x00 && powermode!=0x01
2261         && powermode!=0x40 && powermode!=0x41
2262         && powermode!=0x80 && powermode!=0x81 && powermode!=0x82 && powermode!=0x83
2263         && powermode!=0xff) {
2264       PrintOut(LOG_CRIT, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
2265                name, powermode);
2266       cfg.powermode=0;
2267     }
2268   }
2269 
2270   // Apply ATA settings
2271   std::string msg;
2272 
2273   if (cfg.set_aam)
2274     format_set_result_msg(msg, "AAM", (cfg.set_aam > 0 ?
2275       ata_set_features(atadev, ATA_ENABLE_AAM, cfg.set_aam-1) :
2276       ata_set_features(atadev, ATA_DISABLE_AAM)), cfg.set_aam, true);
2277 
2278   if (cfg.set_apm)
2279     format_set_result_msg(msg, "APM", (cfg.set_apm > 0 ?
2280       ata_set_features(atadev, ATA_ENABLE_APM, cfg.set_apm-1) :
2281       ata_set_features(atadev, ATA_DISABLE_APM)), cfg.set_apm, true);
2282 
2283   if (cfg.set_lookahead)
2284     format_set_result_msg(msg, "Rd-ahead", ata_set_features(atadev,
2285       (cfg.set_lookahead > 0 ? ATA_ENABLE_READ_LOOK_AHEAD : ATA_DISABLE_READ_LOOK_AHEAD)),
2286       cfg.set_lookahead);
2287 
2288   if (cfg.set_wcache)
2289     format_set_result_msg(msg, "Wr-cache", ata_set_features(atadev,
2290       (cfg.set_wcache > 0? ATA_ENABLE_WRITE_CACHE : ATA_DISABLE_WRITE_CACHE)), cfg.set_wcache);
2291 
2292   if (cfg.set_dsn)
2293     format_set_result_msg(msg, "DSN", ata_set_features(atadev,
2294       ATA_ENABLE_DISABLE_DSN, (cfg.set_dsn > 0 ? 0x1 : 0x2)));
2295 
2296   if (cfg.set_security_freeze)
2297     format_set_result_msg(msg, "Security freeze",
2298       ata_nodata_command(atadev, ATA_SECURITY_FREEZE_LOCK));
2299 
2300   if (cfg.set_standby)
2301     format_set_result_msg(msg, "Standby",
2302       ata_nodata_command(atadev, ATA_IDLE, cfg.set_standby-1), cfg.set_standby, true);
2303 
2304   // Report as one log entry
2305   if (!msg.empty())
2306     PrintOut(LOG_INFO, "Device: %s, ATA settings applied: %s\n", name, msg.c_str());
2307 
2308   // set SCT Error Recovery Control if requested
2309   if (cfg.sct_erc_set) {
2310     if (!isSCTErrorRecoveryControlCapable(&drive))
2311       PrintOut(LOG_INFO, "Device: %s, no SCT Error Recovery Control support, ignoring -l scterc\n",
2312                name);
2313     else if (locked)
2314       PrintOut(LOG_INFO, "Device: %s, no SCT support if ATA Security is LOCKED, ignoring -l scterc\n",
2315                name);
2316     else if (   ataSetSCTErrorRecoveryControltime(atadev, 1, cfg.sct_erc_readtime )
2317              || ataSetSCTErrorRecoveryControltime(atadev, 2, cfg.sct_erc_writetime))
2318       PrintOut(LOG_INFO, "Device: %s, set of SCT Error Recovery Control failed\n", name);
2319     else
2320       PrintOut(LOG_INFO, "Device: %s, SCT Error Recovery Control set to: Read: %u, Write: %u\n",
2321                name, cfg.sct_erc_readtime, cfg.sct_erc_writetime);
2322   }
2323 
2324   // If no tests available or selected, return
2325   if (!(   cfg.smartcheck  || cfg.selftest
2326         || cfg.errorlog    || cfg.xerrorlog
2327         || cfg.offlinests  || cfg.selfteststs
2328         || cfg.usagefailed || cfg.prefail  || cfg.usage
2329         || cfg.tempdiff    || cfg.tempinfo || cfg.tempcrit)) {
2330     CloseDevice(atadev, name);
2331     return 3;
2332   }
2333 
2334   // tell user we are registering device
2335   PrintOut(LOG_INFO,"Device: %s, is SMART capable. Adding to \"monitor\" list.\n",name);
2336 
2337   // close file descriptor
2338   CloseDevice(atadev, name);
2339 
2340   if (!state_path_prefix.empty() || !attrlog_path_prefix.empty()) {
2341     // Build file name for state file
2342     std::replace_if(model, model+strlen(model), not_allowed_in_filename, '_');
2343     std::replace_if(serial, serial+strlen(serial), not_allowed_in_filename, '_');
2344     if (!state_path_prefix.empty()) {
2345       cfg.state_file = strprintf("%s%s-%s.ata.state", state_path_prefix.c_str(), model, serial);
2346       // Read previous state
2347       if (read_dev_state(cfg.state_file.c_str(), state)) {
2348         PrintOut(LOG_INFO, "Device: %s, state read from %s\n", name, cfg.state_file.c_str());
2349         // Copy ATA attribute values to temp state
2350         state.update_temp_state();
2351       }
2352     }
2353     if (!attrlog_path_prefix.empty())
2354       cfg.attrlog_file = strprintf("%s%s-%s.ata.csv", attrlog_path_prefix.c_str(), model, serial);
2355   }
2356 
2357   finish_device_scan(cfg, state);
2358 
2359   return 0;
2360 }
2361 
2362 // on success, return 0. On failure, return >0.  Never return <0,
2363 // please.
SCSIDeviceScan(dev_config & cfg,dev_state & state,scsi_device * scsidev,const dev_config_vector * prev_cfgs)2364 static int SCSIDeviceScan(dev_config & cfg, dev_state & state, scsi_device * scsidev,
2365                           const dev_config_vector * prev_cfgs)
2366 {
2367   int err, req_len, avail_len, version, len;
2368   const char *device = cfg.name.c_str();
2369   struct scsi_iec_mode_page iec;
2370   uint8_t  tBuf[64];
2371   uint8_t  inqBuf[96];
2372   uint8_t  vpdBuf[252];
2373   char lu_id[64], serial[256], vendor[40], model[40];
2374 
2375   // Device must be open
2376   memset(inqBuf, 0, 96);
2377   req_len = 36;
2378   if ((err = scsiStdInquiry(scsidev, inqBuf, req_len))) {
2379     /* Marvell controllers fail on a 36 bytes StdInquiry, but 64 suffices */
2380     req_len = 64;
2381     if ((err = scsiStdInquiry(scsidev, inqBuf, req_len))) {
2382       PrintOut(LOG_INFO, "Device: %s, Both 36 and 64 byte INQUIRY failed; "
2383                "skip device\n", device);
2384       return 2;
2385     }
2386   }
2387   version = (inqBuf[2] & 0x7f); /* Accept old ISO/IEC 9316:1995 variants */
2388 
2389   avail_len = inqBuf[4] + 5;
2390   len = (avail_len < req_len) ? avail_len : req_len;
2391   if (len < 36) {
2392     PrintOut(LOG_INFO, "Device: %s, INQUIRY response less than 36 bytes; "
2393              "skip device\n", device);
2394     return 2;
2395   }
2396 
2397   int pdt = inqBuf[0] & 0x1f;
2398 
2399   if (! ((0 == pdt) || (4 == pdt) || (5 == pdt) || (7 == pdt) ||
2400          (0xe == pdt))) {
2401     PrintOut(LOG_INFO, "Device: %s, not a disk like device [PDT=0x%x], "
2402              "skip\n", device, pdt);
2403     return 2;
2404   }
2405 
2406   if (supported_vpd_pages_p) {
2407     delete supported_vpd_pages_p;
2408     supported_vpd_pages_p = NULL;
2409   }
2410   supported_vpd_pages_p = new supported_vpd_pages(scsidev);
2411 
2412   lu_id[0] = '\0';
2413   if ((version >= 0x3) && (version < 0x8)) {
2414     /* SPC to SPC-5 */
2415     if (0 == scsiInquiryVpd(scsidev, SCSI_VPD_DEVICE_IDENTIFICATION,
2416                             vpdBuf, sizeof(vpdBuf))) {
2417       len = vpdBuf[3];
2418       scsi_decode_lu_dev_id(vpdBuf + 4, len, lu_id, sizeof(lu_id), NULL);
2419     }
2420   }
2421   serial[0] = '\0';
2422   if (0 == scsiInquiryVpd(scsidev, SCSI_VPD_UNIT_SERIAL_NUMBER,
2423                           vpdBuf, sizeof(vpdBuf))) {
2424           len = vpdBuf[3];
2425           vpdBuf[4 + len] = '\0';
2426           scsi_format_id_string(serial, &vpdBuf[4], len);
2427   }
2428 
2429   char si_str[64];
2430   struct scsi_readcap_resp srr;
2431   uint64_t capacity = scsiGetSize(scsidev, scsidev->use_rcap16(), &srr);
2432 
2433   if (capacity)
2434     format_capacity(si_str, sizeof(si_str), capacity, ".");
2435   else
2436     si_str[0] = '\0';
2437 
2438   // Format device id string for warning emails
2439   cfg.dev_idinfo = strprintf("[%.8s %.16s %.4s]%s%s%s%s%s%s",
2440                      (char *)&inqBuf[8], (char *)&inqBuf[16], (char *)&inqBuf[32],
2441                      (lu_id[0] ? ", lu id: " : ""), (lu_id[0] ? lu_id : ""),
2442                      (serial[0] ? ", S/N: " : ""), (serial[0] ? serial : ""),
2443                      (si_str[0] ? ", " : ""), (si_str[0] ? si_str : ""));
2444   cfg.id_is_unique = (lu_id[0] || serial[0]);
2445 
2446   // format "model" string
2447   scsi_format_id_string(vendor, &inqBuf[8], 8);
2448   scsi_format_id_string(model, &inqBuf[16], 16);
2449   PrintOut(LOG_INFO, "Device: %s, %s\n", device, cfg.dev_idinfo.c_str());
2450 
2451   // Check for duplicates
2452   if (prev_cfgs && is_duplicate_dev_idinfo(cfg, *prev_cfgs)) {
2453     CloseDevice(scsidev, device);
2454     return 1;
2455   }
2456 
2457   // check that device is ready for commands. IE stores its stuff on
2458   // the media.
2459   if ((err = scsiTestUnitReady(scsidev))) {
2460     if (SIMPLE_ERR_NOT_READY == err)
2461       PrintOut(LOG_INFO, "Device: %s, NOT READY (e.g. spun down); skip device\n", device);
2462     else if (SIMPLE_ERR_NO_MEDIUM == err)
2463       PrintOut(LOG_INFO, "Device: %s, NO MEDIUM present; skip device\n", device);
2464     else if (SIMPLE_ERR_BECOMING_READY == err)
2465       PrintOut(LOG_INFO, "Device: %s, BECOMING (but not yet) READY; skip device\n", device);
2466     else
2467       PrintOut(LOG_CRIT, "Device: %s, failed Test Unit Ready [err=%d]\n", device, err);
2468     CloseDevice(scsidev, device);
2469     return 2;
2470   }
2471 
2472   // Badly-conforming USB storage devices may fail this check.
2473   // The response to the following IE mode page fetch (current and
2474   // changeable values) is carefully examined. It has been found
2475   // that various USB devices that malform the response will lock up
2476   // if asked for a log page (e.g. temperature) so it is best to
2477   // bail out now.
2478   if (!(err = scsiFetchIECmpage(scsidev, &iec, state.modese_len)))
2479     state.modese_len = iec.modese_len;
2480   else if (SIMPLE_ERR_BAD_FIELD == err)
2481     ;  /* continue since it is reasonable not to support IE mpage */
2482   else { /* any other error (including malformed response) unreasonable */
2483     PrintOut(LOG_INFO,
2484              "Device: %s, Bad IEC (SMART) mode page, err=%d, skip device\n",
2485              device, err);
2486     CloseDevice(scsidev, device);
2487     return 3;
2488   }
2489 
2490   // N.B. The following is passive (i.e. it doesn't attempt to turn on
2491   // smart if it is off). This may change to be the same as the ATA side.
2492   if (!scsi_IsExceptionControlEnabled(&iec)) {
2493     PrintOut(LOG_INFO, "Device: %s, IE (SMART) not enabled, skip device\n"
2494                        "Try 'smartctl -s on %s' to turn on SMART features\n",
2495                         device, device);
2496     CloseDevice(scsidev, device);
2497     return 3;
2498   }
2499 
2500   // Flag that certain log pages are supported (information may be
2501   // available from other sources).
2502   if (0 == scsiLogSense(scsidev, SUPPORTED_LPAGES, 0, tBuf, sizeof(tBuf), 0) ||
2503       0 == scsiLogSense(scsidev, SUPPORTED_LPAGES, 0, tBuf, sizeof(tBuf), 68))
2504       /* workaround for the bug #678 on ST8000NM0075/E001. Up to 64 pages + 4b header */
2505   {
2506     for (int k = 4; k < tBuf[3] + LOGPAGEHDRSIZE; ++k) {
2507       switch (tBuf[k]) {
2508       case TEMPERATURE_LPAGE:
2509         state.TempPageSupported = 1;
2510         break;
2511       case IE_LPAGE:
2512         state.SmartPageSupported = 1;
2513         break;
2514       case READ_ERROR_COUNTER_LPAGE:
2515         state.ReadECounterPageSupported = 1;
2516         break;
2517       case WRITE_ERROR_COUNTER_LPAGE:
2518         state.WriteECounterPageSupported = 1;
2519         break;
2520       case VERIFY_ERROR_COUNTER_LPAGE:
2521         state.VerifyECounterPageSupported = 1;
2522         break;
2523       case NON_MEDIUM_ERROR_LPAGE:
2524         state.NonMediumErrorPageSupported = 1;
2525         break;
2526       default:
2527         break;
2528       }
2529     }
2530   }
2531 
2532   // Check if scsiCheckIE() is going to work
2533   {
2534     uint8_t asc = 0;
2535     uint8_t ascq = 0;
2536     uint8_t currenttemp = 0;
2537     uint8_t triptemp = 0;
2538 
2539     if (scsiCheckIE(scsidev, state.SmartPageSupported, state.TempPageSupported,
2540                     &asc, &ascq, &currenttemp, &triptemp)) {
2541       PrintOut(LOG_INFO, "Device: %s, unexpectedly failed to read SMART values\n", device);
2542       state.SuppressReport = 1;
2543     }
2544     if (   (state.SuppressReport || !currenttemp)
2545         && (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)) {
2546       PrintOut(LOG_INFO, "Device: %s, can't monitor Temperature, ignoring -W %d,%d,%d\n",
2547                device, cfg.tempdiff, cfg.tempinfo, cfg.tempcrit);
2548       cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
2549     }
2550   }
2551 
2552   // capability check: self-test-log
2553   if (cfg.selftest){
2554     int retval = scsiCountFailedSelfTests(scsidev, 0);
2555     if (retval<0) {
2556       // no self-test log, turn off monitoring
2557       PrintOut(LOG_INFO, "Device: %s, does not support SMART Self-Test Log.\n", device);
2558       cfg.selftest = false;
2559       state.selflogcount = 0;
2560       state.selfloghour = 0;
2561     }
2562     else {
2563       // register starting values to watch for changes
2564       state.selflogcount=SELFTEST_ERRORCOUNT(retval);
2565       state.selfloghour =SELFTEST_ERRORHOURS(retval);
2566     }
2567   }
2568 
2569   // disable autosave (set GLTSD bit)
2570   if (cfg.autosave==1){
2571     if (scsiSetControlGLTSD(scsidev, 1, state.modese_len))
2572       PrintOut(LOG_INFO,"Device: %s, could not disable autosave (set GLTSD bit).\n",device);
2573     else
2574       PrintOut(LOG_INFO,"Device: %s, disabled autosave (set GLTSD bit).\n",device);
2575   }
2576 
2577   // or enable autosave (clear GLTSD bit)
2578   if (cfg.autosave==2){
2579     if (scsiSetControlGLTSD(scsidev, 0, state.modese_len))
2580       PrintOut(LOG_INFO,"Device: %s, could not enable autosave (clear GLTSD bit).\n",device);
2581     else
2582       PrintOut(LOG_INFO,"Device: %s, enabled autosave (cleared GLTSD bit).\n",device);
2583   }
2584 
2585   // tell user we are registering device
2586   PrintOut(LOG_INFO, "Device: %s, is SMART capable. Adding to \"monitor\" list.\n", device);
2587 
2588   // Make sure that init_standby_check() ignores SCSI devices
2589   cfg.offlinests_ns = cfg.selfteststs_ns = false;
2590 
2591   // close file descriptor
2592   CloseDevice(scsidev, device);
2593 
2594   if (!state_path_prefix.empty() || !attrlog_path_prefix.empty()) {
2595     // Build file name for state file
2596     std::replace_if(model, model+strlen(model), not_allowed_in_filename, '_');
2597     std::replace_if(serial, serial+strlen(serial), not_allowed_in_filename, '_');
2598     if (!state_path_prefix.empty()) {
2599       cfg.state_file = strprintf("%s%s-%s-%s.scsi.state", state_path_prefix.c_str(), vendor, model, serial);
2600       // Read previous state
2601       if (read_dev_state(cfg.state_file.c_str(), state)) {
2602         PrintOut(LOG_INFO, "Device: %s, state read from %s\n", device, cfg.state_file.c_str());
2603         // Copy ATA attribute values to temp state
2604         state.update_temp_state();
2605       }
2606     }
2607     if (!attrlog_path_prefix.empty())
2608       cfg.attrlog_file = strprintf("%s%s-%s-%s.scsi.csv", attrlog_path_prefix.c_str(), vendor, model, serial);
2609   }
2610 
2611   finish_device_scan(cfg, state);
2612 
2613   return 0;
2614 }
2615 
2616 // Convert 128 bit LE integer to uint64_t or its max value on overflow.
le128_to_uint64(const unsigned char (& val)[16])2617 static uint64_t le128_to_uint64(const unsigned char (& val)[16])
2618 {
2619   for (int i = 8; i < 16; i++) {
2620     if (val[i])
2621       return ~(uint64_t)0;
2622   }
2623   uint64_t lo = val[7];
2624   for (int i = 7-1; i >= 0; i--) {
2625     lo <<= 8; lo += val[i];
2626   }
2627   return lo;
2628 }
2629 
2630 // Get max temperature in Kelvin reported in NVMe SMART/Health log.
nvme_get_max_temp_kelvin(const nvme_smart_log & smart_log)2631 static int nvme_get_max_temp_kelvin(const nvme_smart_log & smart_log)
2632 {
2633   int k = (smart_log.temperature[1] << 8) | smart_log.temperature[0];
2634   for (int i = 0; i < 8; i++) {
2635     if (smart_log.temp_sensor[i] > k)
2636       k = smart_log.temp_sensor[i];
2637   }
2638   return k;
2639 }
2640 
NVMeDeviceScan(dev_config & cfg,dev_state & state,nvme_device * nvmedev,const dev_config_vector * prev_cfgs)2641 static int NVMeDeviceScan(dev_config & cfg, dev_state & state, nvme_device * nvmedev,
2642                           const dev_config_vector * prev_cfgs)
2643 {
2644   const char *name = cfg.name.c_str();
2645 
2646   // Device must be open
2647 
2648   // Get ID Controller
2649   nvme_id_ctrl id_ctrl;
2650   if (!nvme_read_id_ctrl(nvmedev, id_ctrl)) {
2651     PrintOut(LOG_INFO, "Device: %s, NVMe Identify Controller failed\n", name);
2652     CloseDevice(nvmedev, name);
2653     return 2;
2654   }
2655 
2656   // Get drive identity
2657   char model[40+1], serial[20+1], firmware[8+1];
2658   format_char_array(model, id_ctrl.mn);
2659   format_char_array(serial, id_ctrl.sn);
2660   format_char_array(firmware, id_ctrl.fr);
2661 
2662   // Format device id string for warning emails
2663   char nsstr[32] = "", capstr[32] = "";
2664   unsigned nsid = nvmedev->get_nsid();
2665   if (nsid != 0xffffffff)
2666     snprintf(nsstr, sizeof(nsstr), ", NSID:%u", nsid);
2667   uint64_t capacity = le128_to_uint64(id_ctrl.tnvmcap);
2668   if (capacity)
2669     format_capacity(capstr, sizeof(capstr), capacity, ".");
2670   cfg.dev_idinfo = strprintf("%s, S/N:%s, FW:%s%s%s%s", model, serial, firmware,
2671                              nsstr, (capstr[0] ? ", " : ""), capstr);
2672   cfg.id_is_unique = true; // TODO: Check serial?
2673 
2674   PrintOut(LOG_INFO, "Device: %s, %s\n", name, cfg.dev_idinfo.c_str());
2675 
2676   // Check for duplicates
2677   if (prev_cfgs && is_duplicate_dev_idinfo(cfg, *prev_cfgs)) {
2678     CloseDevice(nvmedev, name);
2679     return 1;
2680   }
2681 
2682   // Read SMART/Health log
2683   nvme_smart_log smart_log;
2684   if (!nvme_read_smart_log(nvmedev, smart_log)) {
2685     PrintOut(LOG_INFO, "Device: %s, failed to read NVMe SMART/Health Information\n", name);
2686     CloseDevice(nvmedev, name);
2687     return 2;
2688   }
2689 
2690   // Check temperature sensor support
2691   if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit) {
2692     if (!nvme_get_max_temp_kelvin(smart_log)) {
2693       PrintOut(LOG_INFO, "Device: %s, no Temperature sensors, ignoring -W %d,%d,%d\n",
2694                name, cfg.tempdiff, cfg.tempinfo, cfg.tempcrit);
2695       cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
2696     }
2697   }
2698 
2699   // Init total error count
2700   if (cfg.errorlog || cfg.xerrorlog) {
2701     state.nvme_err_log_entries = le128_to_uint64(smart_log.num_err_log_entries);
2702   }
2703 
2704   // If no supported tests selected, return
2705   if (!(   cfg.smartcheck || cfg.errorlog || cfg.xerrorlog
2706         || cfg.tempdiff   || cfg.tempinfo || cfg.tempcrit )) {
2707     CloseDevice(nvmedev, name);
2708     return 3;
2709   }
2710 
2711   // Tell user we are registering device
2712   PrintOut(LOG_INFO,"Device: %s, is SMART capable. Adding to \"monitor\" list.\n", name);
2713 
2714   // Make sure that init_standby_check() ignores NVMe devices
2715   cfg.offlinests_ns = cfg.selfteststs_ns = false;
2716 
2717   CloseDevice(nvmedev, name);
2718 
2719   if (!state_path_prefix.empty()) {
2720     // Build file name for state file
2721     std::replace_if(model, model+strlen(model), not_allowed_in_filename, '_');
2722     std::replace_if(serial, serial+strlen(serial), not_allowed_in_filename, '_');
2723     nsstr[0] = 0;
2724     if (nsid != 0xffffffff)
2725       snprintf(nsstr, sizeof(nsstr), "-n%u", nsid);
2726     cfg.state_file = strprintf("%s%s-%s%s.nvme.state", state_path_prefix.c_str(), model, serial, nsstr);
2727     // Read previous state
2728     if (read_dev_state(cfg.state_file.c_str(), state))
2729       PrintOut(LOG_INFO, "Device: %s, state read from %s\n", name, cfg.state_file.c_str());
2730   }
2731 
2732   finish_device_scan(cfg, state);
2733 
2734   return 0;
2735 }
2736 
2737 // Open device for next check, return false on error
open_device(const dev_config & cfg,dev_state & state,smart_device * device,const char * type)2738 static bool open_device(const dev_config & cfg, dev_state & state, smart_device * device,
2739                         const char * type)
2740 {
2741   const char * name = cfg.name.c_str();
2742 
2743   // If user has asked, test the email warning system
2744   if (cfg.emailtest)
2745     MailWarning(cfg, state, 0, "TEST EMAIL from smartd for device: %s", name);
2746 
2747   // User may have requested (with the -n Directive) to leave the disk
2748   // alone if it is in idle or standby mode.  In this case check the
2749   // power mode first before opening the device for full access,
2750   // and exit without check if disk is reported in standby.
2751   if (device->is_ata() && cfg.powermode && !state.powermodefail && !state.removed) {
2752     // Note that 'is_powered_down()' handles opening the device itself, and
2753     // can be used before calling 'open()' (that's the whole point of 'is_powered_down()'!).
2754     if (device->is_powered_down())
2755     {
2756       // skip at most powerskipmax checks
2757       if (!cfg.powerskipmax || state.powerskipcnt<cfg.powerskipmax) {
2758         // report first only except if state has changed, avoid waking up system disk
2759         if ((!state.powerskipcnt || state.lastpowermodeskipped != -1) && !cfg.powerquiet) {
2760           PrintOut(LOG_INFO, "Device: %s, is in %s mode, suspending checks\n", name, "STANDBY (OS)");
2761           state.lastpowermodeskipped = -1;
2762         }
2763         state.powerskipcnt++;
2764         return false;
2765       }
2766     }
2767   }
2768 
2769   // if we can't open device, fail gracefully rather than hard --
2770   // perhaps the next time around we'll be able to open it
2771   if (!device->open()) {
2772     // For removable devices, print error message only once and suppress email
2773     if (!cfg.removable) {
2774       PrintOut(LOG_INFO, "Device: %s, open() of %s device failed: %s\n", name, type, device->get_errmsg());
2775       MailWarning(cfg, state, 9, "Device: %s, unable to open %s device", name, type);
2776     }
2777     else if (!state.removed) {
2778       PrintOut(LOG_INFO, "Device: %s, removed %s device: %s\n", name, type, device->get_errmsg());
2779       state.removed = true;
2780     }
2781     else if (debugmode)
2782       PrintOut(LOG_INFO, "Device: %s, %s device still removed: %s\n", name, type, device->get_errmsg());
2783     return false;
2784   }
2785 
2786   if (debugmode)
2787     PrintOut(LOG_INFO,"Device: %s, opened %s device\n", name, type);
2788 
2789   if (!cfg.removable)
2790     reset_warning_mail(cfg, state, 9, "open of %s device worked again", type);
2791   else if (state.removed) {
2792     PrintOut(LOG_INFO, "Device: %s, reconnected %s device\n", name, type);
2793     state.removed = false;
2794   }
2795 
2796   return true;
2797 }
2798 
2799 // If the self-test log has got more self-test errors (or more recent
2800 // self-test errors) recorded, then notify user.
CheckSelfTestLogs(const dev_config & cfg,dev_state & state,int newi)2801 static void CheckSelfTestLogs(const dev_config & cfg, dev_state & state, int newi)
2802 {
2803   const char * name = cfg.name.c_str();
2804 
2805   if (newi<0)
2806     // command failed
2807     MailWarning(cfg, state, 8, "Device: %s, Read SMART Self-Test Log Failed", name);
2808   else {
2809     reset_warning_mail(cfg, state, 8, "Read SMART Self-Test Log worked again");
2810 
2811     // old and new error counts
2812     int oldc=state.selflogcount;
2813     int newc=SELFTEST_ERRORCOUNT(newi);
2814 
2815     // old and new error timestamps in hours
2816     int oldh=state.selfloghour;
2817     int newh=SELFTEST_ERRORHOURS(newi);
2818 
2819     if (oldc<newc) {
2820       // increase in error count
2821       PrintOut(LOG_CRIT, "Device: %s, Self-Test Log error count increased from %d to %d\n",
2822                name, oldc, newc);
2823       MailWarning(cfg, state, 3, "Device: %s, Self-Test Log error count increased from %d to %d",
2824                    name, oldc, newc);
2825       state.must_write = true;
2826     }
2827     else if (newc > 0 && oldh != newh) {
2828       // more recent error
2829       // a 'more recent' error might actually be a smaller hour number,
2830       // if the hour number has wrapped.
2831       // There's still a bug here.  You might just happen to run a new test
2832       // exactly 32768 hours after the previous failure, and have run exactly
2833       // 20 tests between the two, in which case smartd will miss the
2834       // new failure.
2835       PrintOut(LOG_CRIT, "Device: %s, new Self-Test Log error at hour timestamp %d\n",
2836                name, newh);
2837       MailWarning(cfg, state, 3, "Device: %s, new Self-Test Log error at hour timestamp %d",
2838                    name, newh);
2839       state.must_write = true;
2840     }
2841 
2842     // Print info if error entries have disappeared
2843     // or newer successful successful extended self-test exits
2844     if (oldc > newc) {
2845       PrintOut(LOG_INFO, "Device: %s, Self-Test Log error count decreased from %d to %d\n",
2846                name, oldc, newc);
2847       if (newc == 0)
2848         reset_warning_mail(cfg, state, 3, "Self-Test Log does no longer report errors");
2849     }
2850 
2851     // Needed since self-test error count may DECREASE.  Hour might
2852     // also have changed.
2853     state.selflogcount= newc;
2854     state.selfloghour = newh;
2855   }
2856   return;
2857 }
2858 
2859 // Test types, ordered by priority.
2860 static const char test_type_chars[] = "LncrSCO";
2861 static const unsigned num_test_types = sizeof(test_type_chars)-1;
2862 
2863 // returns test type if time to do test of type testtype,
2864 // 0 if not time to do test.
next_scheduled_test(const dev_config & cfg,dev_state & state,bool scsi,time_t usetime=0)2865 static char next_scheduled_test(const dev_config & cfg, dev_state & state, bool scsi, time_t usetime = 0)
2866 {
2867   // check that self-testing has been requested
2868   if (cfg.test_regex.empty())
2869     return 0;
2870 
2871   // Exit if drive not capable of any test
2872   if ( state.not_cap_long && state.not_cap_short &&
2873       (scsi || (state.not_cap_conveyance && state.not_cap_offline)))
2874     return 0;
2875 
2876   // since we are about to call localtime(), be sure glibc is informed
2877   // of any timezone changes we make.
2878   if (!usetime)
2879     FixGlibcTimeZoneBug();
2880 
2881   // Is it time for next check?
2882   time_t now = (!usetime ? time(0) : usetime);
2883   if (now < state.scheduled_test_next_check) {
2884     if (state.scheduled_test_next_check <= now + 3600)
2885       return 0; // Next check within one hour
2886     // More than one hour, assume system clock time adjusted to the past
2887     state.scheduled_test_next_check = now;
2888   }
2889   else if (state.scheduled_test_next_check + (3600L*24*90) < now) {
2890     // Limit time check interval to 90 days
2891     state.scheduled_test_next_check = now - (3600L*24*90);
2892   }
2893 
2894   // Find ':NNN[-LLL]' in regex for possible offsets and limits
2895   const unsigned max_offsets = 1 + num_test_types;
2896   unsigned offsets[max_offsets] = {0, }, limits[max_offsets] = {0, };
2897   unsigned num_offsets = 1; // offsets/limits[0] == 0 always
2898   for (const char * p = cfg.test_regex.get_pattern(); num_offsets < max_offsets; ) {
2899     const char * q = strchr(p, ':');
2900     if (!q)
2901       break;
2902     p = q + 1;
2903     unsigned offset = 0, limit = 0; int n1 = -1, n2 = -1, n3 = -1;
2904     sscanf(p, "%u%n-%n%u%n", &offset, &n1, &n2, &limit, &n3);
2905     if (!(n1 == 3 && (n2 < 0 || (n3 == 3+1+3 && limit > 0))))
2906       continue;
2907     offsets[num_offsets] = offset; limits[num_offsets] = limit;
2908     num_offsets++;
2909     p += (n3 > 0 ? n3 : n1);
2910   }
2911 
2912   // Check interval [state.scheduled_test_next_check, now] for scheduled tests
2913   char testtype = 0;
2914   time_t testtime = 0; int testhour = 0;
2915   int maxtest = num_test_types-1;
2916 
2917   for (time_t t = state.scheduled_test_next_check; ; ) {
2918     // Check offset 0 and then all offsets for ':NNN' found above
2919     for (unsigned i = 0; i < num_offsets; i++) {
2920       unsigned offset = offsets[i], limit = limits[i];
2921       unsigned delay = cfg.test_offset_factor * offset;
2922       if (0 < limit && limit < delay)
2923         delay %= limit + 1;
2924       struct tm tmbuf, * tms = time_to_tm_local(&tmbuf, t - (delay * 3600));
2925 
2926       // tm_wday is 0 (Sunday) to 6 (Saturday).  We use 1 (Monday) to 7 (Sunday).
2927       int weekday = (tms->tm_wday ? tms->tm_wday : 7);
2928       for (int j = 0; j <= maxtest; j++) {
2929         // Skip if drive not capable of this test
2930         switch (test_type_chars[j]) {
2931           case 'L': if (state.not_cap_long)       continue; break;
2932           case 'S': if (state.not_cap_short)      continue; break;
2933           case 'C': if (scsi || state.not_cap_conveyance) continue; break;
2934           case 'O': if (scsi || state.not_cap_offline)    continue; break;
2935           case 'c': case 'n':
2936           case 'r': if (scsi || state.not_cap_selective)  continue; break;
2937           default: continue;
2938         }
2939         // Try match of "T/MM/DD/d/HH[:NNN]"
2940         char pattern[64];
2941         snprintf(pattern, sizeof(pattern), "%c/%02d/%02d/%1d/%02d",
2942           test_type_chars[j], tms->tm_mon+1, tms->tm_mday, weekday, tms->tm_hour);
2943         if (i > 0) {
2944           const unsigned len = sizeof("S/01/01/1/01") - 1;
2945           snprintf(pattern + len, sizeof(pattern) - len, ":%03u", offset);
2946           if (limit > 0)
2947             snprintf(pattern + len + 4, sizeof(pattern) - len - 4, "-%03u", limit);
2948         }
2949         if (cfg.test_regex.full_match(pattern)) {
2950           // Test found
2951           testtype = pattern[0];
2952           testtime = t; testhour = tms->tm_hour;
2953           // Limit further matches to higher priority self-tests
2954           maxtest = j-1;
2955           break;
2956         }
2957       }
2958     }
2959 
2960     // Exit if no tests left or current time reached
2961     if (maxtest < 0)
2962       break;
2963     if (t >= now)
2964       break;
2965     // Check next hour
2966     if ((t += 3600) > now)
2967       t = now;
2968    }
2969 
2970   // Do next check not before next hour.
2971   struct tm tmbuf, * tmnow = time_to_tm_local(&tmbuf, now);
2972   state.scheduled_test_next_check = now + (3600 - tmnow->tm_min*60 - tmnow->tm_sec);
2973 
2974   if (testtype) {
2975     state.must_write = true;
2976     // Tell user if an old test was found.
2977     if (!usetime && !(testhour == tmnow->tm_hour && testtime + 3600 > now)) {
2978       char datebuf[DATEANDEPOCHLEN]; dateandtimezoneepoch(datebuf, testtime);
2979       PrintOut(LOG_INFO, "Device: %s, old test of type %c not run at %s, starting now.\n",
2980         cfg.name.c_str(), testtype, datebuf);
2981     }
2982   }
2983 
2984   return testtype;
2985 }
2986 
2987 // Print a list of future tests.
PrintTestSchedule(const dev_config_vector & configs,dev_state_vector & states,const smart_device_list & devices)2988 static void PrintTestSchedule(const dev_config_vector & configs, dev_state_vector & states, const smart_device_list & devices)
2989 {
2990   unsigned numdev = configs.size();
2991   if (!numdev)
2992     return;
2993   std::vector<int> testcnts(numdev * num_test_types, 0);
2994 
2995   PrintOut(LOG_INFO, "\nNext scheduled self tests (at most 5 of each type per device):\n");
2996 
2997   // FixGlibcTimeZoneBug(); // done in PrintOut()
2998   time_t now = time(0);
2999   char datenow[DATEANDEPOCHLEN], date[DATEANDEPOCHLEN];
3000   dateandtimezoneepoch(datenow, now);
3001 
3002   long seconds;
3003   for (seconds=checktime; seconds<3600L*24*90; seconds+=checktime) {
3004     // Check for each device whether a test will be run
3005     time_t testtime = now + seconds;
3006     for (unsigned i = 0; i < numdev; i++) {
3007       const dev_config & cfg = configs.at(i);
3008       dev_state & state = states.at(i);
3009       const char * p;
3010       char testtype = next_scheduled_test(cfg, state, devices.at(i)->is_scsi(), testtime);
3011       if (testtype && (p = strchr(test_type_chars, testtype))) {
3012         unsigned t = (p - test_type_chars);
3013         // Report at most 5 tests of each type
3014         if (++testcnts[i*num_test_types + t] <= 5) {
3015           dateandtimezoneepoch(date, testtime);
3016           PrintOut(LOG_INFO, "Device: %s, will do test %d of type %c at %s\n", cfg.name.c_str(),
3017             testcnts[i*num_test_types + t], testtype, date);
3018         }
3019       }
3020     }
3021   }
3022 
3023   // Report totals
3024   dateandtimezoneepoch(date, now+seconds);
3025   PrintOut(LOG_INFO, "\nTotals [%s - %s]:\n", datenow, date);
3026   for (unsigned i = 0; i < numdev; i++) {
3027     const dev_config & cfg = configs.at(i);
3028     bool scsi = devices.at(i)->is_scsi();
3029     for (unsigned t = 0; t < num_test_types; t++) {
3030       int cnt = testcnts[i*num_test_types + t];
3031       if (cnt == 0 && !strchr((scsi ? "LS" : "LSCO"), test_type_chars[t]))
3032         continue;
3033       PrintOut(LOG_INFO, "Device: %s, will do %3d test%s of type %c\n", cfg.name.c_str(),
3034         cnt, (cnt==1?"":"s"), test_type_chars[t]);
3035     }
3036   }
3037 
3038 }
3039 
3040 // Return zero on success, nonzero on failure. Perform offline (background)
3041 // short or long (extended) self test on given scsi device.
DoSCSISelfTest(const dev_config & cfg,dev_state & state,scsi_device * device,char testtype)3042 static int DoSCSISelfTest(const dev_config & cfg, dev_state & state, scsi_device * device, char testtype)
3043 {
3044   int retval = 0;
3045   const char *testname = 0;
3046   const char *name = cfg.name.c_str();
3047   int inProgress;
3048 
3049   if (scsiSelfTestInProgress(device, &inProgress)) {
3050     PrintOut(LOG_CRIT, "Device: %s, does not support Self-Tests\n", name);
3051     state.not_cap_short = state.not_cap_long = true;
3052     return 1;
3053   }
3054 
3055   if (1 == inProgress) {
3056     PrintOut(LOG_INFO, "Device: %s, skip since Self-Test already in "
3057              "progress.\n", name);
3058     return 1;
3059   }
3060 
3061   switch (testtype) {
3062   case 'S':
3063     testname = "Short Self";
3064     retval = scsiSmartShortSelfTest(device);
3065     break;
3066   case 'L':
3067     testname = "Long Self";
3068     retval = scsiSmartExtendSelfTest(device);
3069     break;
3070   }
3071   // If we can't do the test, exit
3072   if (NULL == testname) {
3073     PrintOut(LOG_CRIT, "Device: %s, not capable of %c Self-Test\n", name,
3074              testtype);
3075     return 1;
3076   }
3077   if (retval) {
3078     if ((SIMPLE_ERR_BAD_OPCODE == retval) ||
3079         (SIMPLE_ERR_BAD_FIELD == retval)) {
3080       PrintOut(LOG_CRIT, "Device: %s, not capable of %s-Test\n", name,
3081                testname);
3082       if ('L'==testtype)
3083         state.not_cap_long = true;
3084       else
3085         state.not_cap_short = true;
3086 
3087       return 1;
3088     }
3089     PrintOut(LOG_CRIT, "Device: %s, execute %s-Test failed (err: %d)\n", name,
3090              testname, retval);
3091     return 1;
3092   }
3093 
3094   PrintOut(LOG_INFO, "Device: %s, starting scheduled %s-Test.\n", name, testname);
3095 
3096   return 0;
3097 }
3098 
3099 // Do an offline immediate or self-test.  Return zero on success,
3100 // nonzero on failure.
DoATASelfTest(const dev_config & cfg,dev_state & state,ata_device * device,char testtype)3101 static int DoATASelfTest(const dev_config & cfg, dev_state & state, ata_device * device, char testtype)
3102 {
3103   const char *name = cfg.name.c_str();
3104 
3105   // Read current smart data and check status/capability
3106   struct ata_smart_values data;
3107   if (ataReadSmartValues(device, &data) || !(data.offline_data_collection_capability)) {
3108     PrintOut(LOG_CRIT, "Device: %s, not capable of Offline or Self-Testing.\n", name);
3109     return 1;
3110   }
3111 
3112   // Check for capability to do the test
3113   int dotest = -1, mode = 0;
3114   const char *testname = 0;
3115   switch (testtype) {
3116   case 'O':
3117     testname="Offline Immediate ";
3118     if (isSupportExecuteOfflineImmediate(&data))
3119       dotest=OFFLINE_FULL_SCAN;
3120     else
3121       state.not_cap_offline = true;
3122     break;
3123   case 'C':
3124     testname="Conveyance Self-";
3125     if (isSupportConveyanceSelfTest(&data))
3126       dotest=CONVEYANCE_SELF_TEST;
3127     else
3128       state.not_cap_conveyance = true;
3129     break;
3130   case 'S':
3131     testname="Short Self-";
3132     if (isSupportSelfTest(&data))
3133       dotest=SHORT_SELF_TEST;
3134     else
3135       state.not_cap_short = true;
3136     break;
3137   case 'L':
3138     testname="Long Self-";
3139     if (isSupportSelfTest(&data))
3140       dotest=EXTEND_SELF_TEST;
3141     else
3142       state.not_cap_long = true;
3143     break;
3144 
3145   case 'c': case 'n': case 'r':
3146     testname = "Selective Self-";
3147     if (isSupportSelectiveSelfTest(&data)) {
3148       dotest = SELECTIVE_SELF_TEST;
3149       switch (testtype) {
3150         case 'c': mode = SEL_CONT; break;
3151         case 'n': mode = SEL_NEXT; break;
3152         case 'r': mode = SEL_REDO; break;
3153       }
3154     }
3155     else
3156       state.not_cap_selective = true;
3157     break;
3158   }
3159 
3160   // If we can't do the test, exit
3161   if (dotest<0) {
3162     PrintOut(LOG_CRIT, "Device: %s, not capable of %sTest\n", name, testname);
3163     return 1;
3164   }
3165 
3166   // If currently running a self-test, do not interrupt it to start another.
3167   if (15==(data.self_test_exec_status >> 4)) {
3168     if (cfg.firmwarebugs.is_set(BUG_SAMSUNG3) && data.self_test_exec_status == 0xf0) {
3169       PrintOut(LOG_INFO, "Device: %s, will not skip scheduled %sTest "
3170                "despite unclear Self-Test byte (SAMSUNG Firmware bug).\n", name, testname);
3171     } else {
3172       PrintOut(LOG_INFO, "Device: %s, skip scheduled %sTest; %1d0%% remaining of current Self-Test.\n",
3173                name, testname, (int)(data.self_test_exec_status & 0x0f));
3174       return 1;
3175     }
3176   }
3177 
3178   if (dotest == SELECTIVE_SELF_TEST) {
3179     // Set test span
3180     ata_selective_selftest_args selargs, prev_args;
3181     selargs.num_spans = 1;
3182     selargs.span[0].mode = mode;
3183     prev_args.num_spans = 1;
3184     prev_args.span[0].start = state.selective_test_last_start;
3185     prev_args.span[0].end   = state.selective_test_last_end;
3186     if (ataWriteSelectiveSelfTestLog(device, selargs, &data, state.num_sectors, &prev_args)) {
3187       PrintOut(LOG_CRIT, "Device: %s, prepare %sTest failed\n", name, testname);
3188       return 1;
3189     }
3190     uint64_t start = selargs.span[0].start, end = selargs.span[0].end;
3191     PrintOut(LOG_INFO, "Device: %s, %s test span at LBA %" PRIu64 " - %" PRIu64 " (%" PRIu64 " sectors, %u%% - %u%% of disk).\n",
3192       name, (selargs.span[0].mode == SEL_NEXT ? "next" : "redo"),
3193       start, end, end - start + 1,
3194       (unsigned)((100 * start + state.num_sectors/2) / state.num_sectors),
3195       (unsigned)((100 * end   + state.num_sectors/2) / state.num_sectors));
3196     state.selective_test_last_start = start;
3197     state.selective_test_last_end = end;
3198   }
3199 
3200   // execute the test, and return status
3201   int retval = smartcommandhandler(device, IMMEDIATE_OFFLINE, dotest, NULL);
3202   if (retval) {
3203     PrintOut(LOG_CRIT, "Device: %s, execute %sTest failed.\n", name, testname);
3204     return retval;
3205   }
3206 
3207   // Report recent test start to do_disable_standby_check()
3208   // and force log of next test status
3209   if (testtype == 'O')
3210     state.offline_started = true;
3211   else
3212     state.selftest_started = true;
3213 
3214   PrintOut(LOG_INFO, "Device: %s, starting scheduled %sTest.\n", name, testname);
3215   return 0;
3216 }
3217 
3218 // Check pending sector count attribute values (-C, -U directives).
check_pending(const dev_config & cfg,dev_state & state,unsigned char id,bool increase_only,const ata_smart_values & smartval,int mailtype,const char * msg)3219 static void check_pending(const dev_config & cfg, dev_state & state,
3220                           unsigned char id, bool increase_only,
3221                           const ata_smart_values & smartval,
3222                           int mailtype, const char * msg)
3223 {
3224   // Find attribute index
3225   int i = ata_find_attr_index(id, smartval);
3226   if (!(i >= 0 && ata_find_attr_index(id, state.smartval) == i))
3227     return;
3228 
3229   // No report if no sectors pending.
3230   uint64_t rawval = ata_get_attr_raw_value(smartval.vendor_attributes[i], cfg.attribute_defs);
3231   if (rawval == 0) {
3232     reset_warning_mail(cfg, state, mailtype, "No more %s", msg);
3233     return;
3234   }
3235 
3236   // If attribute is not reset, report only sector count increases.
3237   uint64_t prev_rawval = ata_get_attr_raw_value(state.smartval.vendor_attributes[i], cfg.attribute_defs);
3238   if (!(!increase_only || prev_rawval < rawval))
3239     return;
3240 
3241   // Format message.
3242   std::string s = strprintf("Device: %s, %" PRId64 " %s", cfg.name.c_str(), rawval, msg);
3243   if (prev_rawval > 0 && rawval != prev_rawval)
3244     s += strprintf(" (changed %+" PRId64 ")", rawval - prev_rawval);
3245 
3246   PrintOut(LOG_CRIT, "%s\n", s.c_str());
3247   MailWarning(cfg, state, mailtype, "%s", s.c_str());
3248   state.must_write = true;
3249 }
3250 
3251 // Format Temperature value
fmt_temp(unsigned char x,char (& buf)[20])3252 static const char * fmt_temp(unsigned char x, char (& buf)[20])
3253 {
3254   if (!x) // unset
3255     return "??";
3256   snprintf(buf, sizeof(buf), "%u", x);
3257   return buf;
3258 }
3259 
3260 // Check Temperature limits
CheckTemperature(const dev_config & cfg,dev_state & state,unsigned char currtemp,unsigned char triptemp)3261 static void CheckTemperature(const dev_config & cfg, dev_state & state, unsigned char currtemp, unsigned char triptemp)
3262 {
3263   if (!(0 < currtemp && currtemp < 255)) {
3264     PrintOut(LOG_INFO, "Device: %s, failed to read Temperature\n", cfg.name.c_str());
3265     return;
3266   }
3267 
3268   // Update Max Temperature
3269   const char * minchg = "", * maxchg = "";
3270   if (currtemp > state.tempmax) {
3271     if (state.tempmax)
3272       maxchg = "!";
3273     state.tempmax = currtemp;
3274     state.must_write = true;
3275   }
3276 
3277   char buf[20];
3278   if (!state.temperature) {
3279     // First check
3280     if (!state.tempmin || currtemp < state.tempmin)
3281         // Delay Min Temperature update by ~ 30 minutes.
3282         state.tempmin_delay = time(0) + CHECKTIME - 60;
3283     PrintOut(LOG_INFO, "Device: %s, initial Temperature is %d Celsius (Min/Max %s/%u%s)\n",
3284       cfg.name.c_str(), (int)currtemp, fmt_temp(state.tempmin, buf), state.tempmax, maxchg);
3285     if (triptemp)
3286       PrintOut(LOG_INFO, "    [trip Temperature is %d Celsius]\n", (int)triptemp);
3287     state.temperature = currtemp;
3288   }
3289   else {
3290     if (state.tempmin_delay) {
3291       // End Min Temperature update delay if ...
3292       if (   (state.tempmin && currtemp > state.tempmin) // current temp exceeds recorded min,
3293           || (state.tempmin_delay <= time(0))) {         // or delay time is over.
3294         state.tempmin_delay = 0;
3295         if (!state.tempmin)
3296           state.tempmin = 255;
3297       }
3298     }
3299 
3300     // Update Min Temperature
3301     if (!state.tempmin_delay && currtemp < state.tempmin) {
3302       state.tempmin = currtemp;
3303       state.must_write = true;
3304       if (currtemp != state.temperature)
3305         minchg = "!";
3306     }
3307 
3308     // Track changes
3309     if (cfg.tempdiff && (*minchg || *maxchg || abs((int)currtemp - (int)state.temperature) >= cfg.tempdiff)) {
3310       PrintOut(LOG_INFO, "Device: %s, Temperature changed %+d Celsius to %u Celsius (Min/Max %s%s/%u%s)\n",
3311         cfg.name.c_str(), (int)currtemp-(int)state.temperature, currtemp, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
3312       state.temperature = currtemp;
3313     }
3314   }
3315 
3316   // Check limits
3317   if (cfg.tempcrit && currtemp >= cfg.tempcrit) {
3318     PrintOut(LOG_CRIT, "Device: %s, Temperature %u Celsius reached critical limit of %u Celsius (Min/Max %s%s/%u%s)\n",
3319       cfg.name.c_str(), currtemp, cfg.tempcrit, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
3320     MailWarning(cfg, state, 12, "Device: %s, Temperature %d Celsius reached critical limit of %u Celsius (Min/Max %s%s/%u%s)",
3321       cfg.name.c_str(), currtemp, cfg.tempcrit, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
3322   }
3323   else if (cfg.tempinfo && currtemp >= cfg.tempinfo) {
3324     PrintOut(LOG_INFO, "Device: %s, Temperature %u Celsius reached limit of %u Celsius (Min/Max %s%s/%u%s)\n",
3325       cfg.name.c_str(), currtemp, cfg.tempinfo, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
3326   }
3327   else if (cfg.tempcrit) {
3328     unsigned char limit = (cfg.tempinfo ? cfg.tempinfo : cfg.tempcrit-5);
3329     if (currtemp < limit)
3330       reset_warning_mail(cfg, state, 12, "Temperature %u Celsius dropped below %u Celsius", currtemp, limit);
3331   }
3332 }
3333 
3334 // Check normalized and raw attribute values.
check_attribute(const dev_config & cfg,dev_state & state,const ata_smart_attribute & attr,const ata_smart_attribute & prev,int attridx,const ata_smart_threshold_entry * thresholds)3335 static void check_attribute(const dev_config & cfg, dev_state & state,
3336                             const ata_smart_attribute & attr,
3337                             const ata_smart_attribute & prev,
3338                             int attridx,
3339                             const ata_smart_threshold_entry * thresholds)
3340 {
3341   // Check attribute and threshold
3342   ata_attr_state attrstate = ata_get_attr_state(attr, attridx, thresholds, cfg.attribute_defs);
3343   if (attrstate == ATTRSTATE_NON_EXISTING)
3344     return;
3345 
3346   // If requested, check for usage attributes that have failed.
3347   if (   cfg.usagefailed && attrstate == ATTRSTATE_FAILED_NOW
3348       && !cfg.monitor_attr_flags.is_set(attr.id, MONITOR_IGN_FAILUSE)) {
3349     std::string attrname = ata_get_smart_attr_name(attr.id, cfg.attribute_defs, cfg.dev_rpm);
3350     PrintOut(LOG_CRIT, "Device: %s, Failed SMART usage Attribute: %d %s.\n", cfg.name.c_str(), attr.id, attrname.c_str());
3351     MailWarning(cfg, state, 2, "Device: %s, Failed SMART usage Attribute: %d %s.", cfg.name.c_str(), attr.id, attrname.c_str());
3352     state.must_write = true;
3353   }
3354 
3355   // Return if we're not tracking this type of attribute
3356   bool prefail = !!ATTRIBUTE_FLAGS_PREFAILURE(attr.flags);
3357   if (!(   ( prefail && cfg.prefail)
3358         || (!prefail && cfg.usage  )))
3359     return;
3360 
3361   // Return if '-I ID' was specified
3362   if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_IGNORE))
3363     return;
3364 
3365   // Issue warning if they don't have the same ID in all structures.
3366   if (attr.id != prev.id) {
3367     PrintOut(LOG_INFO,"Device: %s, same Attribute has different ID numbers: %d = %d\n",
3368              cfg.name.c_str(), attr.id, prev.id);
3369     return;
3370   }
3371 
3372   // Compare normalized values if valid.
3373   bool valchanged = false;
3374   if (attrstate > ATTRSTATE_NO_NORMVAL) {
3375     if (attr.current != prev.current)
3376       valchanged = true;
3377   }
3378 
3379   // Compare raw values if requested.
3380   bool rawchanged = false;
3381   if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW)) {
3382     if (   ata_get_attr_raw_value(attr, cfg.attribute_defs)
3383         != ata_get_attr_raw_value(prev, cfg.attribute_defs))
3384       rawchanged = true;
3385   }
3386 
3387   // Return if no change
3388   if (!(valchanged || rawchanged))
3389     return;
3390 
3391   // Format value strings
3392   std::string currstr, prevstr;
3393   if (attrstate == ATTRSTATE_NO_NORMVAL) {
3394     // Print raw values only
3395     currstr = strprintf("%s (Raw)",
3396       ata_format_attr_raw_value(attr, cfg.attribute_defs).c_str());
3397     prevstr = strprintf("%s (Raw)",
3398       ata_format_attr_raw_value(prev, cfg.attribute_defs).c_str());
3399   }
3400   else if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW_PRINT)) {
3401     // Print normalized and raw values
3402     currstr = strprintf("%d [Raw %s]", attr.current,
3403       ata_format_attr_raw_value(attr, cfg.attribute_defs).c_str());
3404     prevstr = strprintf("%d [Raw %s]", prev.current,
3405       ata_format_attr_raw_value(prev, cfg.attribute_defs).c_str());
3406   }
3407   else {
3408     // Print normalized values only
3409     currstr = strprintf("%d", attr.current);
3410     prevstr = strprintf("%d", prev.current);
3411   }
3412 
3413   // Format message
3414   std::string msg = strprintf("Device: %s, SMART %s Attribute: %d %s changed from %s to %s",
3415                               cfg.name.c_str(), (prefail ? "Prefailure" : "Usage"), attr.id,
3416                               ata_get_smart_attr_name(attr.id, cfg.attribute_defs, cfg.dev_rpm).c_str(),
3417                               prevstr.c_str(), currstr.c_str());
3418 
3419   // Report this change as critical ?
3420   if (   (valchanged && cfg.monitor_attr_flags.is_set(attr.id, MONITOR_AS_CRIT))
3421       || (rawchanged && cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW_AS_CRIT))) {
3422     PrintOut(LOG_CRIT, "%s\n", msg.c_str());
3423     MailWarning(cfg, state, 2, "%s", msg.c_str());
3424   }
3425   else {
3426     PrintOut(LOG_INFO, "%s\n", msg.c_str());
3427   }
3428   state.must_write = true;
3429 }
3430 
3431 
ATACheckDevice(const dev_config & cfg,dev_state & state,ata_device * atadev,bool firstpass,bool allow_selftests)3432 static int ATACheckDevice(const dev_config & cfg, dev_state & state, ata_device * atadev,
3433                           bool firstpass, bool allow_selftests)
3434 {
3435   if (!open_device(cfg, state, atadev, "ATA"))
3436     return 1;
3437 
3438   const char * name = cfg.name.c_str();
3439 
3440   // user may have requested (with the -n Directive) to leave the disk
3441   // alone if it is in idle or sleeping mode.  In this case check the
3442   // power mode and exit without check if needed
3443   if (cfg.powermode && !state.powermodefail) {
3444     int dontcheck=0, powermode=ataCheckPowerMode(atadev);
3445     const char * mode = 0;
3446     if (0 <= powermode && powermode < 0xff) {
3447       // wait for possible spin up and check again
3448       int powermode2;
3449       sleep(5);
3450       powermode2 = ataCheckPowerMode(atadev);
3451       if (powermode2 > powermode)
3452         PrintOut(LOG_INFO, "Device: %s, CHECK POWER STATUS spins up disk (0x%02x -> 0x%02x)\n", name, powermode, powermode2);
3453       powermode = powermode2;
3454     }
3455 
3456     switch (powermode){
3457     case -1:
3458       // SLEEP
3459       mode="SLEEP";
3460       if (cfg.powermode>=1)
3461         dontcheck=1;
3462       break;
3463     case 0x00:
3464       // STANDBY
3465       mode="STANDBY";
3466       if (cfg.powermode>=2)
3467         dontcheck=1;
3468       break;
3469     case 0x01:
3470       // STANDBY_Y
3471       mode="STANDBY_Y";
3472       if (cfg.powermode>=2)
3473         dontcheck=1;
3474       break;
3475     case 0x80:
3476       // IDLE
3477       mode="IDLE";
3478       if (cfg.powermode>=3)
3479         dontcheck=1;
3480       break;
3481     case 0x81:
3482       // IDLE_A
3483       mode="IDLE_A";
3484       if (cfg.powermode>=3)
3485         dontcheck=1;
3486       break;
3487     case 0x82:
3488       // IDLE_B
3489       mode="IDLE_B";
3490       if (cfg.powermode>=3)
3491         dontcheck=1;
3492       break;
3493     case 0x83:
3494       // IDLE_C
3495       mode="IDLE_C";
3496       if (cfg.powermode>=3)
3497         dontcheck=1;
3498       break;
3499     case 0xff:
3500       // ACTIVE/IDLE
3501     case 0x40:
3502       // ACTIVE
3503     case 0x41:
3504       // ACTIVE
3505       mode="ACTIVE or IDLE";
3506       break;
3507     default:
3508       // UNKNOWN
3509       PrintOut(LOG_CRIT, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
3510         name, powermode);
3511       state.powermodefail = true;
3512       break;
3513     }
3514 
3515     // if we are going to skip a check, return now
3516     if (dontcheck){
3517       // skip at most powerskipmax checks
3518       if (!cfg.powerskipmax || state.powerskipcnt<cfg.powerskipmax) {
3519         CloseDevice(atadev, name);
3520         // report first only except if state has changed, avoid waking up system disk
3521         if ((!state.powerskipcnt || state.lastpowermodeskipped != powermode) && !cfg.powerquiet) {
3522           PrintOut(LOG_INFO, "Device: %s, is in %s mode, suspending checks\n", name, mode);
3523           state.lastpowermodeskipped = powermode;
3524         }
3525         state.powerskipcnt++;
3526         return 0;
3527       }
3528       else {
3529         PrintOut(LOG_INFO, "Device: %s, %s mode ignored due to reached limit of skipped checks (%d check%s skipped)\n",
3530           name, mode, state.powerskipcnt, (state.powerskipcnt==1?"":"s"));
3531       }
3532       state.powerskipcnt = 0;
3533       state.tempmin_delay = time(0) + CHECKTIME - 60; // Delay Min Temperature update
3534     }
3535     else if (state.powerskipcnt) {
3536       PrintOut(LOG_INFO, "Device: %s, is back in %s mode, resuming checks (%d check%s skipped)\n",
3537         name, mode, state.powerskipcnt, (state.powerskipcnt==1?"":"s"));
3538       state.powerskipcnt = 0;
3539       state.tempmin_delay = time(0) + CHECKTIME - 60; // Delay Min Temperature update
3540     }
3541   }
3542 
3543   // check smart status
3544   if (cfg.smartcheck) {
3545     int status=ataSmartStatus2(atadev);
3546     if (status==-1){
3547       PrintOut(LOG_INFO,"Device: %s, not capable of SMART self-check\n",name);
3548       MailWarning(cfg, state, 5, "Device: %s, not capable of SMART self-check", name);
3549       state.must_write = true;
3550     }
3551     else if (status==1){
3552       PrintOut(LOG_CRIT, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!\n", name);
3553       MailWarning(cfg, state, 1, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!", name);
3554       state.must_write = true;
3555     }
3556   }
3557 
3558   // Check everything that depends upon SMART Data (eg, Attribute values)
3559   if (   cfg.usagefailed || cfg.prefail || cfg.usage
3560       || cfg.curr_pending_id || cfg.offl_pending_id
3561       || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit
3562       || cfg.selftest ||  cfg.offlinests || cfg.selfteststs) {
3563 
3564     // Read current attribute values.
3565     ata_smart_values curval;
3566     if (ataReadSmartValues(atadev, &curval)){
3567       PrintOut(LOG_CRIT, "Device: %s, failed to read SMART Attribute Data\n", name);
3568       MailWarning(cfg, state, 6, "Device: %s, failed to read SMART Attribute Data", name);
3569       state.must_write = true;
3570     }
3571     else {
3572       reset_warning_mail(cfg, state, 6, "read SMART Attribute Data worked again");
3573 
3574       // look for current or offline pending sectors
3575       if (cfg.curr_pending_id)
3576         check_pending(cfg, state, cfg.curr_pending_id, cfg.curr_pending_incr, curval, 10,
3577                       (!cfg.curr_pending_incr ? "Currently unreadable (pending) sectors"
3578                                               : "Total unreadable (pending) sectors"    ));
3579 
3580       if (cfg.offl_pending_id)
3581         check_pending(cfg, state, cfg.offl_pending_id, cfg.offl_pending_incr, curval, 11,
3582                       (!cfg.offl_pending_incr ? "Offline uncorrectable sectors"
3583                                               : "Total offline uncorrectable sectors"));
3584 
3585       // check temperature limits
3586       if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
3587         CheckTemperature(cfg, state, ata_return_temperature_value(&curval, cfg.attribute_defs), 0);
3588 
3589       // look for failed usage attributes, or track usage or prefail attributes
3590       if (cfg.usagefailed || cfg.prefail || cfg.usage) {
3591         for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
3592           check_attribute(cfg, state,
3593                           curval.vendor_attributes[i],
3594                           state.smartval.vendor_attributes[i],
3595                           i, state.smartthres.thres_entries);
3596         }
3597       }
3598 
3599       // Log changes of offline data collection status
3600       if (cfg.offlinests) {
3601         if (   curval.offline_data_collection_status
3602                 != state.smartval.offline_data_collection_status
3603             || state.offline_started // test was started in previous call
3604             || (firstpass && (debugmode || (curval.offline_data_collection_status & 0x7d))))
3605           log_offline_data_coll_status(name, curval.offline_data_collection_status);
3606       }
3607 
3608       // Log changes of self-test execution status
3609       if (cfg.selfteststs) {
3610         if (   curval.self_test_exec_status != state.smartval.self_test_exec_status
3611             || state.selftest_started // test was started in previous call
3612             || (firstpass && (debugmode || (curval.self_test_exec_status & 0xf0))))
3613           log_self_test_exec_status(name, curval.self_test_exec_status);
3614       }
3615 
3616       // Save the new values for the next time around
3617       state.smartval = curval;
3618     }
3619   }
3620   state.offline_started = state.selftest_started = false;
3621 
3622   // check if number of selftest errors has increased (note: may also DECREASE)
3623   if (cfg.selftest)
3624     CheckSelfTestLogs(cfg, state, SelfTestErrorCount(atadev, name, cfg.firmwarebugs));
3625 
3626   // check if number of ATA errors has increased
3627   if (cfg.errorlog || cfg.xerrorlog) {
3628 
3629     int errcnt1 = -1, errcnt2 = -1;
3630     if (cfg.errorlog)
3631       errcnt1 = read_ata_error_count(atadev, name, cfg.firmwarebugs, false);
3632     if (cfg.xerrorlog)
3633       errcnt2 = read_ata_error_count(atadev, name, cfg.firmwarebugs, true);
3634 
3635     // new number of errors is max of both logs
3636     int newc = (errcnt1 >= errcnt2 ? errcnt1 : errcnt2);
3637 
3638     // did command fail?
3639     if (newc<0)
3640       // lack of PrintOut here is INTENTIONAL
3641       MailWarning(cfg, state, 7, "Device: %s, Read SMART Error Log Failed", name);
3642 
3643     // has error count increased?
3644     int oldc = state.ataerrorcount;
3645     if (newc>oldc){
3646       PrintOut(LOG_CRIT, "Device: %s, ATA error count increased from %d to %d\n",
3647                name, oldc, newc);
3648       MailWarning(cfg, state, 4, "Device: %s, ATA error count increased from %d to %d",
3649                    name, oldc, newc);
3650       state.must_write = true;
3651     }
3652 
3653     if (newc>=0)
3654       state.ataerrorcount=newc;
3655   }
3656 
3657   // if the user has asked, and device is capable (or we're not yet
3658   // sure) check whether a self test should be done now.
3659   if (allow_selftests && !cfg.test_regex.empty()) {
3660     char testtype = next_scheduled_test(cfg, state, false/*!scsi*/);
3661     if (testtype)
3662       DoATASelfTest(cfg, state, atadev, testtype);
3663   }
3664 
3665   // Don't leave device open -- the OS/user may want to access it
3666   // before the next smartd cycle!
3667   CloseDevice(atadev, name);
3668 
3669   // Copy ATA attribute values to persistent state
3670   state.update_persistent_state();
3671 
3672   state.attrlog_dirty = true;
3673   return 0;
3674 }
3675 
SCSICheckDevice(const dev_config & cfg,dev_state & state,scsi_device * scsidev,bool allow_selftests)3676 static int SCSICheckDevice(const dev_config & cfg, dev_state & state, scsi_device * scsidev, bool allow_selftests)
3677 {
3678   if (!open_device(cfg, state, scsidev, "SCSI"))
3679     return 1;
3680 
3681   const char * name = cfg.name.c_str();
3682 
3683   uint8_t asc = 0, ascq = 0;
3684   uint8_t currenttemp = 0, triptemp = 0;
3685   if (!state.SuppressReport) {
3686     if (scsiCheckIE(scsidev, state.SmartPageSupported, state.TempPageSupported,
3687                     &asc, &ascq, &currenttemp, &triptemp)) {
3688       PrintOut(LOG_INFO, "Device: %s, failed to read SMART values\n",
3689                name);
3690       MailWarning(cfg, state, 6, "Device: %s, failed to read SMART values", name);
3691       state.SuppressReport = 1;
3692     }
3693   }
3694   if (asc > 0) {
3695     const char * cp = scsiGetIEString(asc, ascq);
3696     if (cp) {
3697       PrintOut(LOG_CRIT, "Device: %s, SMART Failure: %s\n", name, cp);
3698       MailWarning(cfg, state, 1,"Device: %s, SMART Failure: %s", name, cp);
3699     } else if (asc == 4 && ascq == 9) {
3700       PrintOut(LOG_INFO,"Device: %s, self-test in progress\n", name);
3701     } else if (debugmode)
3702       PrintOut(LOG_INFO,"Device: %s, non-SMART asc,ascq: %d,%d\n",
3703                name, (int)asc, (int)ascq);
3704   } else if (debugmode)
3705     PrintOut(LOG_INFO,"Device: %s, SMART health: passed\n", name);
3706 
3707   // check temperature limits
3708   if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
3709     CheckTemperature(cfg, state, currenttemp, triptemp);
3710 
3711   // check if number of selftest errors has increased (note: may also DECREASE)
3712   if (cfg.selftest)
3713     CheckSelfTestLogs(cfg, state, scsiCountFailedSelfTests(scsidev, 0));
3714 
3715   if (allow_selftests && !cfg.test_regex.empty()) {
3716     char testtype = next_scheduled_test(cfg, state, true/*scsi*/);
3717     if (testtype)
3718       DoSCSISelfTest(cfg, state, scsidev, testtype);
3719   }
3720   if (!cfg.attrlog_file.empty()){
3721     // saving error counters to state
3722     uint8_t tBuf[252];
3723     if (state.ReadECounterPageSupported && (0 == scsiLogSense(scsidev,
3724       READ_ERROR_COUNTER_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
3725       scsiDecodeErrCounterPage(tBuf, &state.scsi_error_counters[0].errCounter);
3726       state.scsi_error_counters[0].found=1;
3727     }
3728     if (state.WriteECounterPageSupported && (0 == scsiLogSense(scsidev,
3729       WRITE_ERROR_COUNTER_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
3730       scsiDecodeErrCounterPage(tBuf, &state.scsi_error_counters[1].errCounter);
3731       state.scsi_error_counters[1].found=1;
3732     }
3733     if (state.VerifyECounterPageSupported && (0 == scsiLogSense(scsidev,
3734       VERIFY_ERROR_COUNTER_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
3735       scsiDecodeErrCounterPage(tBuf, &state.scsi_error_counters[2].errCounter);
3736       state.scsi_error_counters[2].found=1;
3737     }
3738     if (state.NonMediumErrorPageSupported && (0 == scsiLogSense(scsidev,
3739       NON_MEDIUM_ERROR_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
3740       scsiDecodeNonMediumErrPage(tBuf, &state.scsi_nonmedium_error.nme);
3741       state.scsi_nonmedium_error.found=1;
3742     }
3743     // store temperature if not done by CheckTemperature() above
3744     if (!(cfg.tempdiff || cfg.tempinfo || cfg.tempcrit))
3745       state.temperature = currenttemp;
3746   }
3747   CloseDevice(scsidev, name);
3748   state.attrlog_dirty = true;
3749   return 0;
3750 }
3751 
NVMeCheckDevice(const dev_config & cfg,dev_state & state,nvme_device * nvmedev)3752 static int NVMeCheckDevice(const dev_config & cfg, dev_state & state, nvme_device * nvmedev)
3753 {
3754   if (!open_device(cfg, state, nvmedev, "NVMe"))
3755     return 1;
3756 
3757   const char * name = cfg.name.c_str();
3758 
3759   // Read SMART/Health log
3760   nvme_smart_log smart_log;
3761   if (!nvme_read_smart_log(nvmedev, smart_log)) {
3762       PrintOut(LOG_INFO, "Device: %s, failed to read NVMe SMART/Health Information\n", name);
3763       MailWarning(cfg, state, 6, "Device: %s, failed to read NVMe SMART/Health Information", name);
3764       state.must_write = true;
3765       return 0;
3766   }
3767 
3768   // Check Critical Warning bits
3769   if (cfg.smartcheck && smart_log.critical_warning) {
3770     unsigned char w = smart_log.critical_warning;
3771     std::string msg;
3772     static const char * const wnames[] =
3773       {"LowSpare", "Temperature", "Reliability", "R/O", "VolMemBackup"};
3774 
3775     for (unsigned b = 0, cnt = 0; b < 8 ; b++) {
3776       if (!(w & (1 << b)))
3777         continue;
3778       if (cnt)
3779         msg += ", ";
3780       if (++cnt > 3) {
3781         msg += "..."; break;
3782       }
3783       if (b >= sizeof(wnames)/sizeof(wnames[0])) {
3784         msg += "*Unknown*"; break;
3785       }
3786       msg += wnames[b];
3787     }
3788 
3789     PrintOut(LOG_CRIT, "Device: %s, Critical Warning (0x%02x): %s\n", name, w, msg.c_str());
3790     MailWarning(cfg, state, 1, "Device: %s, Critical Warning (0x%02x): %s", name, w, msg.c_str());
3791     state.must_write = true;
3792   }
3793 
3794   // Check temperature limits
3795   if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit) {
3796     int k = nvme_get_max_temp_kelvin(smart_log);
3797     // Convert Kelvin to positive Celsius (TODO: Allow negative temperatures)
3798     int c = k - 273;
3799     if (c < 1)
3800       c = 1;
3801     else if (c > 0xff)
3802       c = 0xff;
3803     CheckTemperature(cfg, state, c, 0);
3804   }
3805 
3806   // Check if number of errors has increased
3807   if (cfg.errorlog || cfg.xerrorlog) {
3808     uint64_t oldcnt = state.nvme_err_log_entries;
3809     uint64_t newcnt = le128_to_uint64(smart_log.num_err_log_entries);
3810     if (newcnt > oldcnt) {
3811       PrintOut(LOG_CRIT, "Device: %s, number of Error Log entries increased from %" PRIu64 " to %" PRIu64 "\n",
3812                name, oldcnt, newcnt);
3813       MailWarning(cfg, state, 4, "Device: %s, number of Error Log entries increased from %" PRIu64 " to %" PRIu64,
3814                   name, oldcnt, newcnt);
3815       state.must_write = true;
3816     }
3817     state.nvme_err_log_entries = newcnt;
3818   }
3819 
3820   CloseDevice(nvmedev, name);
3821   state.attrlog_dirty = true;
3822   return 0;
3823 }
3824 
3825 // 0=not used, 1=not disabled, 2=disable rejected by OS, 3=disabled
3826 static int standby_disable_state = 0;
3827 
init_disable_standby_check(dev_config_vector & configs)3828 static void init_disable_standby_check(dev_config_vector & configs)
3829 {
3830   // Check for '-l offlinests,ns' or '-l selfteststs,ns' directives
3831   bool sts1 = false, sts2 = false;
3832   for (unsigned i = 0; i < configs.size() && !(sts1 || sts2); i++) {
3833     const dev_config & cfg = configs.at(i);
3834     if (cfg.offlinests_ns)
3835       sts1 = true;
3836     if (cfg.selfteststs_ns)
3837       sts2 = true;
3838   }
3839 
3840   // Check for support of disable auto standby
3841   // Reenable standby if smartd.conf was reread
3842   if (sts1 || sts2 || standby_disable_state == 3) {
3843    if (!smi()->disable_system_auto_standby(false)) {
3844       if (standby_disable_state == 3)
3845         PrintOut(LOG_CRIT, "System auto standby enable failed: %s\n", smi()->get_errmsg());
3846       if (sts1 || sts2) {
3847         PrintOut(LOG_INFO, "Disable auto standby not supported, ignoring ',ns' from %s%s%s\n",
3848           (sts1 ? "-l offlinests,ns" : ""), (sts1 && sts2 ? " and " : ""), (sts2 ? "-l selfteststs,ns" : ""));
3849         sts1 = sts2 = false;
3850       }
3851     }
3852   }
3853 
3854   standby_disable_state = (sts1 || sts2 ? 1 : 0);
3855 }
3856 
do_disable_standby_check(const dev_config_vector & configs,const dev_state_vector & states)3857 static void do_disable_standby_check(const dev_config_vector & configs, const dev_state_vector & states)
3858 {
3859   if (!standby_disable_state)
3860     return;
3861 
3862   // Check for just started or still running self-tests
3863   bool running = false;
3864   for (unsigned i = 0; i < configs.size() && !running; i++) {
3865     const dev_config & cfg = configs.at(i); const dev_state & state = states.at(i);
3866 
3867     if (   (   cfg.offlinests_ns
3868             && (state.offline_started ||
3869                 is_offl_coll_in_progress(state.smartval.offline_data_collection_status)))
3870         || (   cfg.selfteststs_ns
3871             && (state.selftest_started ||
3872                 is_self_test_in_progress(state.smartval.self_test_exec_status)))         )
3873       running = true;
3874     // state.offline/selftest_started will be reset after next logging of test status
3875   }
3876 
3877   // Disable/enable auto standby and log state changes
3878   if (!running) {
3879     if (standby_disable_state != 1) {
3880       if (!smi()->disable_system_auto_standby(false))
3881         PrintOut(LOG_CRIT, "Self-test(s) completed, system auto standby enable failed: %s\n",
3882                  smi()->get_errmsg());
3883       else
3884         PrintOut(LOG_INFO, "Self-test(s) completed, system auto standby enabled\n");
3885       standby_disable_state = 1;
3886     }
3887   }
3888   else if (!smi()->disable_system_auto_standby(true)) {
3889     if (standby_disable_state != 2) {
3890       PrintOut(LOG_INFO, "Self-test(s) in progress, system auto standby disable rejected: %s\n",
3891                smi()->get_errmsg());
3892       standby_disable_state = 2;
3893     }
3894   }
3895   else {
3896     if (standby_disable_state != 3) {
3897       PrintOut(LOG_INFO, "Self-test(s) in progress, system auto standby disabled\n");
3898       standby_disable_state = 3;
3899     }
3900   }
3901 }
3902 
3903 // Checks the SMART status of all ATA and SCSI devices
CheckDevicesOnce(const dev_config_vector & configs,dev_state_vector & states,smart_device_list & devices,bool firstpass,bool allow_selftests)3904 static void CheckDevicesOnce(const dev_config_vector & configs, dev_state_vector & states,
3905                              smart_device_list & devices, bool firstpass, bool allow_selftests)
3906 {
3907   for (unsigned i = 0; i < configs.size(); i++) {
3908     const dev_config & cfg = configs.at(i);
3909     dev_state & state = states.at(i);
3910     smart_device * dev = devices.at(i);
3911     if (dev->is_ata())
3912       ATACheckDevice(cfg, state, dev->to_ata(), firstpass, allow_selftests);
3913     else if (dev->is_scsi())
3914       SCSICheckDevice(cfg, state, dev->to_scsi(), allow_selftests);
3915     else if (dev->is_nvme())
3916       NVMeCheckDevice(cfg, state, dev->to_nvme());
3917   }
3918 
3919   do_disable_standby_check(configs, states);
3920 }
3921 
3922 // Install all signal handlers
install_signal_handlers()3923 static void install_signal_handlers()
3924 {
3925   // normal and abnormal exit
3926   set_signal_if_not_ignored(SIGTERM, sighandler);
3927   set_signal_if_not_ignored(SIGQUIT, sighandler);
3928 
3929   // in debug mode, <CONTROL-C> ==> HUP
3930   set_signal_if_not_ignored(SIGINT, (debugmode ? HUPhandler : sighandler));
3931 
3932   // Catch HUP and USR1
3933   set_signal_if_not_ignored(SIGHUP, HUPhandler);
3934   set_signal_if_not_ignored(SIGUSR1, USR1handler);
3935 #ifdef _WIN32
3936   set_signal_if_not_ignored(SIGUSR2, USR2handler);
3937 #endif
3938 }
3939 
3940 #ifdef _WIN32
3941 // Toggle debug mode implemented for native windows only
3942 // (there is no easy way to reopen tty on *nix)
ToggleDebugMode()3943 static void ToggleDebugMode()
3944 {
3945   if (!debugmode) {
3946     PrintOut(LOG_INFO,"Signal USR2 - enabling debug mode\n");
3947     if (!daemon_enable_console("smartd [Debug]")) {
3948       debugmode = 1;
3949       daemon_signal(SIGINT, HUPhandler);
3950       PrintOut(LOG_INFO,"smartd debug mode enabled, PID=%d\n", getpid());
3951     }
3952     else
3953       PrintOut(LOG_INFO,"enable console failed\n");
3954   }
3955   else if (debugmode == 1) {
3956     daemon_disable_console();
3957     debugmode = 0;
3958     daemon_signal(SIGINT, sighandler);
3959     PrintOut(LOG_INFO,"Signal USR2 - debug mode disabled\n");
3960   }
3961   else
3962     PrintOut(LOG_INFO,"Signal USR2 - debug mode %d not changed\n", debugmode);
3963 }
3964 #endif
3965 
dosleep(time_t wakeuptime,bool & sigwakeup,int numdev)3966 static time_t dosleep(time_t wakeuptime, bool & sigwakeup, int numdev)
3967 {
3968   // If past wake-up-time, compute next wake-up-time
3969   time_t timenow=time(NULL);
3970   while (wakeuptime<=timenow){
3971     time_t intervals = 1 + (timenow-wakeuptime)/checktime;
3972     wakeuptime+=intervals*checktime;
3973   }
3974 
3975   notify_wait(wakeuptime, numdev);
3976 
3977   // sleep until we catch SIGUSR1 or have completed sleeping
3978   int addtime = 0;
3979   while (timenow < wakeuptime+addtime && !caughtsigUSR1 && !caughtsigHUP && !caughtsigEXIT) {
3980 
3981     // protect user again system clock being adjusted backwards
3982     if (wakeuptime>timenow+checktime){
3983       PrintOut(LOG_INFO, "System clock time adjusted to the past. Resetting next wakeup time.\n");
3984       wakeuptime=timenow+checktime;
3985     }
3986 
3987     // Exit sleep when time interval has expired or a signal is received
3988     sleep(wakeuptime+addtime-timenow);
3989 
3990 #ifdef _WIN32
3991     // toggle debug mode?
3992     if (caughtsigUSR2) {
3993       ToggleDebugMode();
3994       caughtsigUSR2 = 0;
3995     }
3996 #endif
3997 
3998     timenow=time(NULL);
3999 
4000     // Actual sleep time too long?
4001     if (!addtime && timenow > wakeuptime+60) {
4002       if (debugmode)
4003         PrintOut(LOG_INFO, "Sleep time was %d seconds too long, assuming wakeup from standby mode.\n",
4004           (int)(timenow-wakeuptime));
4005       // Wait another 20 seconds to avoid I/O errors during disk spin-up
4006       addtime = timenow-wakeuptime+20;
4007       // Use next wake-up-time if close
4008       int nextcheck = checktime - addtime % checktime;
4009       if (nextcheck <= 20)
4010         addtime += nextcheck;
4011     }
4012   }
4013 
4014   // if we caught a SIGUSR1 then print message and clear signal
4015   if (caughtsigUSR1){
4016     PrintOut(LOG_INFO,"Signal USR1 - checking devices now rather than in %d seconds.\n",
4017              wakeuptime-timenow>0?(int)(wakeuptime-timenow):0);
4018     caughtsigUSR1=0;
4019     sigwakeup = true;
4020   }
4021 
4022   // return adjusted wakeuptime
4023   return wakeuptime;
4024 }
4025 
4026 // Print out a list of valid arguments for the Directive d
printoutvaliddirectiveargs(int priority,char d)4027 static void printoutvaliddirectiveargs(int priority, char d)
4028 {
4029   switch (d) {
4030   case 'n':
4031     PrintOut(priority, "never[,N][,q], sleep[,N][,q], standby[,N][,q], idle[,N][,q]");
4032     break;
4033   case 's':
4034     PrintOut(priority, "valid_regular_expression");
4035     break;
4036   case 'd':
4037     PrintOut(priority, "%s", smi()->get_valid_dev_types_str().c_str());
4038     break;
4039   case 'T':
4040     PrintOut(priority, "normal, permissive");
4041     break;
4042   case 'o':
4043   case 'S':
4044     PrintOut(priority, "on, off");
4045     break;
4046   case 'l':
4047     PrintOut(priority, "error, selftest");
4048     break;
4049   case 'M':
4050     PrintOut(priority, "\"once\", \"daily\", \"diminishing\", \"test\", \"exec\"");
4051     break;
4052   case 'v':
4053     PrintOut(priority, "\n%s\n", create_vendor_attribute_arg_list().c_str());
4054     break;
4055   case 'P':
4056     PrintOut(priority, "use, ignore, show, showall");
4057     break;
4058   case 'F':
4059     PrintOut(priority, "%s", get_valid_firmwarebug_args());
4060     break;
4061   case 'e':
4062     PrintOut(priority, "aam,[N|off], apm,[N|off], lookahead,[on|off], dsn,[on|off] "
4063                        "security-freeze, standby,[N|off], wcache,[on|off]");
4064     break;
4065   }
4066 }
4067 
4068 // exits with an error message, or returns integer value of token
GetInteger(const char * arg,const char * name,const char * token,int lineno,const char * cfgfile,int min,int max,char * suffix=0)4069 static int GetInteger(const char *arg, const char *name, const char *token, int lineno, const char *cfgfile,
4070                int min, int max, char * suffix = 0)
4071 {
4072   // make sure argument is there
4073   if (!arg) {
4074     PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s takes integer argument from %d to %d.\n",
4075              cfgfile, lineno, name, token, min, max);
4076     return -1;
4077   }
4078 
4079   // get argument value (base 10), check that it's integer, and in-range
4080   char *endptr;
4081   int val = strtol(arg,&endptr,10);
4082 
4083   // optional suffix present?
4084   if (suffix) {
4085     if (!strcmp(endptr, suffix))
4086       endptr += strlen(suffix);
4087     else
4088       *suffix = 0;
4089   }
4090 
4091   if (!(!*endptr && min <= val && val <= max)) {
4092     PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s has argument: %s; needs integer from %d to %d.\n",
4093              cfgfile, lineno, name, token, arg, min, max);
4094     return -1;
4095   }
4096 
4097   // all is well; return value
4098   return val;
4099 }
4100 
4101 
4102 // Get 1-3 small integer(s) for '-W' directive
Get3Integers(const char * arg,const char * name,const char * token,int lineno,const char * cfgfile,unsigned char * val1,unsigned char * val2,unsigned char * val3)4103 static int Get3Integers(const char *arg, const char *name, const char *token, int lineno, const char *cfgfile,
4104                  unsigned char *val1, unsigned char *val2, unsigned char *val3)
4105 {
4106   unsigned v1 = 0, v2 = 0, v3 = 0;
4107   int n1 = -1, n2 = -1, n3 = -1, len;
4108   if (!arg) {
4109     PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s takes 1-3 integer argument(s) from 0 to 255.\n",
4110              cfgfile, lineno, name, token);
4111     return -1;
4112   }
4113 
4114   len = strlen(arg);
4115   if (!(   sscanf(arg, "%u%n,%u%n,%u%n", &v1, &n1, &v2, &n2, &v3, &n3) >= 1
4116         && (n1 == len || n2 == len || n3 == len) && v1 <= 255 && v2 <= 255 && v3 <= 255)) {
4117     PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s has argument: %s; needs 1-3 integer(s) from 0 to 255.\n",
4118              cfgfile, lineno, name, token, arg);
4119     return -1;
4120   }
4121   *val1 = (unsigned char)v1; *val2 = (unsigned char)v2; *val3 = (unsigned char)v3;
4122   return 0;
4123 }
4124 
4125 
4126 #ifdef _WIN32
4127 
4128 // Concatenate strtok() results if quoted with "..."
strtok_dequote(const char * delimiters)4129 static const char * strtok_dequote(const char * delimiters)
4130 {
4131   const char * t = strtok(0, delimiters);
4132   if (!t || t[0] != '"')
4133     return t;
4134 
4135   static std::string token;
4136   token = t+1;
4137   for (;;) {
4138     t = strtok(0, delimiters);
4139     if (!t || !*t)
4140       return "\"";
4141     token += ' ';
4142     int len = strlen(t);
4143     if (t[len-1] == '"') {
4144       token += std::string(t, len-1);
4145       break;
4146     }
4147     token += t;
4148   }
4149   return token.c_str();
4150 }
4151 
4152 #endif // _WIN32
4153 
4154 
4155 // This function returns 1 if it has correctly parsed one token (and
4156 // any arguments), else zero if no tokens remain.  It returns -1 if an
4157 // error was encountered.
ParseToken(char * token,dev_config & cfg,smart_devtype_list & scan_types)4158 static int ParseToken(char * token, dev_config & cfg, smart_devtype_list & scan_types)
4159 {
4160   char sym;
4161   const char * name = cfg.name.c_str();
4162   int lineno=cfg.lineno;
4163   const char *delim = " \n\t";
4164   int badarg = 0;
4165   int missingarg = 0;
4166   const char *arg = 0;
4167 
4168   // is the rest of the line a comment
4169   if (*token=='#')
4170     return 1;
4171 
4172   // is the token not recognized?
4173   if (*token!='-' || strlen(token)!=2) {
4174     PrintOut(LOG_CRIT,"File %s line %d (drive %s): unknown Directive: %s\n",
4175              configfile, lineno, name, token);
4176     PrintOut(LOG_CRIT, "Run smartd -D to print a list of valid Directives.\n");
4177     return -1;
4178   }
4179 
4180   // token we will be parsing:
4181   sym=token[1];
4182 
4183   // parse the token and swallow its argument
4184   int val;
4185   char plus[] = "+", excl[] = "!";
4186 
4187   switch (sym) {
4188   case 'C':
4189     // monitor current pending sector count (default 197)
4190     if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 0, 255, plus)) < 0)
4191       return -1;
4192     cfg.curr_pending_id = (unsigned char)val;
4193     cfg.curr_pending_incr = (*plus == '+');
4194     cfg.curr_pending_set = true;
4195     break;
4196   case 'U':
4197     // monitor offline uncorrectable sectors (default 198)
4198     if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 0, 255, plus)) < 0)
4199       return -1;
4200     cfg.offl_pending_id = (unsigned char)val;
4201     cfg.offl_pending_incr = (*plus == '+');
4202     cfg.offl_pending_set = true;
4203     break;
4204   case 'T':
4205     // Set tolerance level for SMART command failures
4206     if ((arg = strtok(NULL, delim)) == NULL) {
4207       missingarg = 1;
4208     } else if (!strcmp(arg, "normal")) {
4209       // Normal mode: exit on failure of a mandatory S.M.A.R.T. command, but
4210       // not on failure of an optional S.M.A.R.T. command.
4211       // This is the default so we don't need to actually do anything here.
4212       cfg.permissive = false;
4213     } else if (!strcmp(arg, "permissive")) {
4214       // Permissive mode; ignore errors from Mandatory SMART commands
4215       cfg.permissive = true;
4216     } else {
4217       badarg = 1;
4218     }
4219     break;
4220   case 'd':
4221     // specify the device type
4222     if ((arg = strtok(NULL, delim)) == NULL) {
4223       missingarg = 1;
4224     } else if (!strcmp(arg, "ignore")) {
4225       cfg.ignore = true;
4226     } else if (!strcmp(arg, "removable")) {
4227       cfg.removable = true;
4228     } else if (!strcmp(arg, "auto")) {
4229       cfg.dev_type = "";
4230       scan_types.clear();
4231     } else {
4232       cfg.dev_type = arg;
4233       scan_types.push_back(arg);
4234     }
4235     break;
4236   case 'F':
4237     // fix firmware bug
4238     if (!(arg = strtok(0, delim)))
4239       missingarg = 1;
4240     else if (!parse_firmwarebug_def(arg, cfg.firmwarebugs))
4241       badarg = 1;
4242     break;
4243   case 'H':
4244     // check SMART status
4245     cfg.smartcheck = true;
4246     break;
4247   case 'f':
4248     // check for failure of usage attributes
4249     cfg.usagefailed = true;
4250     break;
4251   case 't':
4252     // track changes in all vendor attributes
4253     cfg.prefail = true;
4254     cfg.usage = true;
4255     break;
4256   case 'p':
4257     // track changes in prefail vendor attributes
4258     cfg.prefail = true;
4259     break;
4260   case 'u':
4261     //  track changes in usage vendor attributes
4262     cfg.usage = true;
4263     break;
4264   case 'l':
4265     // track changes in SMART logs
4266     if ((arg = strtok(NULL, delim)) == NULL) {
4267       missingarg = 1;
4268     } else if (!strcmp(arg, "selftest")) {
4269       // track changes in self-test log
4270       cfg.selftest = true;
4271     } else if (!strcmp(arg, "error")) {
4272       // track changes in ATA error log
4273       cfg.errorlog = true;
4274     } else if (!strcmp(arg, "xerror")) {
4275       // track changes in Extended Comprehensive SMART error log
4276       cfg.xerrorlog = true;
4277     } else if (!strcmp(arg, "offlinests")) {
4278       // track changes in offline data collection status
4279       cfg.offlinests = true;
4280     } else if (!strcmp(arg, "offlinests,ns")) {
4281       // track changes in offline data collection status, disable auto standby
4282       cfg.offlinests = cfg.offlinests_ns = true;
4283     } else if (!strcmp(arg, "selfteststs")) {
4284       // track changes in self-test execution status
4285       cfg.selfteststs = true;
4286     } else if (!strcmp(arg, "selfteststs,ns")) {
4287       // track changes in self-test execution status, disable auto standby
4288       cfg.selfteststs = cfg.selfteststs_ns = true;
4289     } else if (!strncmp(arg, "scterc,", sizeof("scterc,")-1)) {
4290         // set SCT Error Recovery Control
4291         unsigned rt = ~0, wt = ~0; int nc = -1;
4292         sscanf(arg,"scterc,%u,%u%n", &rt, &wt, &nc);
4293         if (nc == (int)strlen(arg) && rt <= 999 && wt <= 999) {
4294           cfg.sct_erc_set = true;
4295           cfg.sct_erc_readtime = rt;
4296           cfg.sct_erc_writetime = wt;
4297         }
4298         else
4299           badarg = 1;
4300     } else {
4301       badarg = 1;
4302     }
4303     break;
4304   case 'a':
4305     // monitor everything
4306     cfg.smartcheck = true;
4307     cfg.prefail = true;
4308     cfg.usagefailed = true;
4309     cfg.usage = true;
4310     cfg.selftest = true;
4311     cfg.errorlog = true;
4312     cfg.selfteststs = true;
4313     break;
4314   case 'o':
4315     // automatic offline testing enable/disable
4316     if ((arg = strtok(NULL, delim)) == NULL) {
4317       missingarg = 1;
4318     } else if (!strcmp(arg, "on")) {
4319       cfg.autoofflinetest = 2;
4320     } else if (!strcmp(arg, "off")) {
4321       cfg.autoofflinetest = 1;
4322     } else {
4323       badarg = 1;
4324     }
4325     break;
4326   case 'n':
4327     // skip disk check if in idle or standby mode
4328     if (!(arg = strtok(NULL, delim)))
4329       missingarg = 1;
4330     else {
4331       char *endptr = NULL;
4332       char *next = strchr(const_cast<char*>(arg), ',');
4333 
4334       cfg.powerquiet = false;
4335       cfg.powerskipmax = 0;
4336 
4337       if (next!=NULL) *next='\0';
4338       if (!strcmp(arg, "never"))
4339         cfg.powermode = 0;
4340       else if (!strcmp(arg, "sleep"))
4341         cfg.powermode = 1;
4342       else if (!strcmp(arg, "standby"))
4343         cfg.powermode = 2;
4344       else if (!strcmp(arg, "idle"))
4345         cfg.powermode = 3;
4346       else
4347         badarg = 1;
4348 
4349       // if optional arguments are present
4350       if (!badarg && next!=NULL) {
4351         next++;
4352         cfg.powerskipmax = strtol(next, &endptr, 10);
4353         if (endptr == next)
4354           cfg.powerskipmax = 0;
4355         else {
4356           next = endptr + (*endptr != '\0');
4357           if (cfg.powerskipmax <= 0)
4358             badarg = 1;
4359         }
4360         if (*next != '\0') {
4361           if (!strcmp("q", next))
4362             cfg.powerquiet = true;
4363           else {
4364             badarg = 1;
4365           }
4366         }
4367       }
4368     }
4369     break;
4370   case 'S':
4371     // automatic attribute autosave enable/disable
4372     if ((arg = strtok(NULL, delim)) == NULL) {
4373       missingarg = 1;
4374     } else if (!strcmp(arg, "on")) {
4375       cfg.autosave = 2;
4376     } else if (!strcmp(arg, "off")) {
4377       cfg.autosave = 1;
4378     } else {
4379       badarg = 1;
4380     }
4381     break;
4382   case 's':
4383     // warn user, and delete any previously given -s REGEXP Directives
4384     if (!cfg.test_regex.empty()){
4385       PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous Test Directive -s %s\n",
4386                configfile, lineno, name, cfg.test_regex.get_pattern());
4387       cfg.test_regex = regular_expression();
4388     }
4389     // check for missing argument
4390     if (!(arg = strtok(NULL, delim))) {
4391       missingarg = 1;
4392     }
4393     // Compile regex
4394     else {
4395       if (!cfg.test_regex.compile(arg)) {
4396         // not a valid regular expression!
4397         PrintOut(LOG_CRIT, "File %s line %d (drive %s): -s argument \"%s\" is INVALID extended regular expression. %s.\n",
4398                  configfile, lineno, name, arg, cfg.test_regex.get_errmsg());
4399         return -1;
4400       }
4401       // Do a bit of sanity checking and warn user if we think that
4402       // their regexp is "strange". User probably confused about shell
4403       // glob(3) syntax versus regular expression syntax regexp(7).
4404       // Check also for possible invalid number of digits in ':NNN[-LLL]' suffix.
4405       static const regular_expression syntax_check(
4406         "[^]$()*+./:?^[|0-9LSCOncr-]+|"
4407         ":[0-9]{0,2}($|[^0-9])|:[0-9]{4,}|"
4408         ":[0-9]{3}-(000|[0-9]{0,2}($|[^0-9])|[0-9]{4,})"
4409       );
4410       regular_expression::match_range range;
4411       if (syntax_check.execute(arg, 1, &range) && 0 <= range.rm_so && range.rm_so < range.rm_eo)
4412         PrintOut(LOG_INFO,  "File %s line %d (drive %s): warning, \"%.*s\" looks odd in "
4413                             "extended regular expression \"%s\"\n",
4414                  configfile, lineno, name, (int)(range.rm_eo - range.rm_so), arg + range.rm_so, arg);
4415     }
4416     break;
4417   case 'm':
4418     // send email to address that follows
4419     if (!(arg = strtok(NULL,delim)))
4420       missingarg = 1;
4421     else {
4422       if (!cfg.emailaddress.empty())
4423         PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous Address Directive -m %s\n",
4424                  configfile, lineno, name, cfg.emailaddress.c_str());
4425       cfg.emailaddress = arg;
4426     }
4427     break;
4428   case 'M':
4429     // email warning options
4430     if (!(arg = strtok(NULL, delim)))
4431       missingarg = 1;
4432     else if (!strcmp(arg, "once"))
4433       cfg.emailfreq = 1;
4434     else if (!strcmp(arg, "daily"))
4435       cfg.emailfreq = 2;
4436     else if (!strcmp(arg, "diminishing"))
4437       cfg.emailfreq = 3;
4438     else if (!strcmp(arg, "test"))
4439       cfg.emailtest = 1;
4440     else if (!strcmp(arg, "exec")) {
4441       // Get the next argument (the command line)
4442 #ifdef _WIN32
4443       // Allow "/path name/with spaces/..." on Windows
4444       arg = strtok_dequote(delim);
4445       if (arg && arg[0] == '"') {
4446         PrintOut(LOG_CRIT, "File %s line %d (drive %s): Directive %s 'exec' argument: missing closing quote\n",
4447                  configfile, lineno, name, token);
4448         return -1;
4449       }
4450 #else
4451       arg = strtok(0, delim);
4452 #endif
4453       if (!arg) {
4454         PrintOut(LOG_CRIT, "File %s line %d (drive %s): Directive %s 'exec' argument must be followed by executable path.\n",
4455                  configfile, lineno, name, token);
4456         return -1;
4457       }
4458       // Free the last cmd line given if any, and copy new one
4459       if (!cfg.emailcmdline.empty())
4460         PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous mail Directive -M exec %s\n",
4461                  configfile, lineno, name, cfg.emailcmdline.c_str());
4462       cfg.emailcmdline = arg;
4463     }
4464     else
4465       badarg = 1;
4466     break;
4467   case 'i':
4468     // ignore failure of usage attribute
4469     if ((val=GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255))<0)
4470       return -1;
4471     cfg.monitor_attr_flags.set(val, MONITOR_IGN_FAILUSE);
4472     break;
4473   case 'I':
4474     // ignore attribute for tracking purposes
4475     if ((val=GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255))<0)
4476       return -1;
4477     cfg.monitor_attr_flags.set(val, MONITOR_IGNORE);
4478     break;
4479   case 'r':
4480     // print raw value when tracking
4481     if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255, excl)) < 0)
4482       return -1;
4483     cfg.monitor_attr_flags.set(val, MONITOR_RAW_PRINT);
4484     if (*excl == '!') // attribute change is critical
4485       cfg.monitor_attr_flags.set(val, MONITOR_AS_CRIT);
4486     break;
4487   case 'R':
4488     // track changes in raw value (forces printing of raw value)
4489     if ((val = GetInteger(arg=strtok(NULL,delim), name, token, lineno, configfile, 1, 255, excl)) < 0)
4490       return -1;
4491     cfg.monitor_attr_flags.set(val, MONITOR_RAW_PRINT|MONITOR_RAW);
4492     if (*excl == '!') // raw value change is critical
4493       cfg.monitor_attr_flags.set(val, MONITOR_RAW_AS_CRIT);
4494     break;
4495   case 'W':
4496     // track Temperature
4497     if (Get3Integers(arg=strtok(NULL, delim), name, token, lineno, configfile,
4498                      &cfg.tempdiff, &cfg.tempinfo, &cfg.tempcrit) < 0)
4499       return -1;
4500     break;
4501   case 'v':
4502     // non-default vendor-specific attribute meaning
4503     if (!(arg=strtok(NULL,delim))) {
4504       missingarg = 1;
4505     } else if (!parse_attribute_def(arg, cfg.attribute_defs, PRIOR_USER)) {
4506       badarg = 1;
4507     }
4508     break;
4509   case 'P':
4510     // Define use of drive-specific presets.
4511     if (!(arg = strtok(NULL, delim))) {
4512       missingarg = 1;
4513     } else if (!strcmp(arg, "use")) {
4514       cfg.ignorepresets = false;
4515     } else if (!strcmp(arg, "ignore")) {
4516       cfg.ignorepresets = true;
4517     } else if (!strcmp(arg, "show")) {
4518       cfg.showpresets = true;
4519     } else if (!strcmp(arg, "showall")) {
4520       showallpresets();
4521     } else {
4522       badarg = 1;
4523     }
4524     break;
4525 
4526   case 'e':
4527     // Various ATA settings
4528     if (!(arg = strtok(NULL, delim))) {
4529       missingarg = true;
4530     }
4531     else {
4532       char arg2[16+1]; unsigned val;
4533       int n1 = -1, n2 = -1, n3 = -1, len = strlen(arg);
4534       if (sscanf(arg, "%16[^,=]%n%*[,=]%n%u%n", arg2, &n1, &n2, &val, &n3) >= 1
4535           && (n1 == len || n2 > 0)) {
4536         bool on  = (n2 > 0 && !strcmp(arg+n2, "on"));
4537         bool off = (n2 > 0 && !strcmp(arg+n2, "off"));
4538         if (n3 != len)
4539           val = ~0U;
4540 
4541         if (!strcmp(arg2, "aam")) {
4542           if (off)
4543             cfg.set_aam = -1;
4544           else if (val <= 254)
4545             cfg.set_aam = val + 1;
4546           else
4547             badarg = true;
4548         }
4549         else if (!strcmp(arg2, "apm")) {
4550           if (off)
4551             cfg.set_apm = -1;
4552           else if (1 <= val && val <= 254)
4553             cfg.set_apm = val + 1;
4554           else
4555             badarg = true;
4556         }
4557         else if (!strcmp(arg2, "lookahead")) {
4558           if (off)
4559             cfg.set_lookahead = -1;
4560           else if (on)
4561             cfg.set_lookahead = 1;
4562           else
4563             badarg = true;
4564         }
4565         else if (!strcmp(arg, "security-freeze")) {
4566           cfg.set_security_freeze = true;
4567         }
4568         else if (!strcmp(arg2, "standby")) {
4569           if (off)
4570             cfg.set_standby = 0 + 1;
4571           else if (val <= 255)
4572             cfg.set_standby = val + 1;
4573           else
4574             badarg = true;
4575         }
4576         else if (!strcmp(arg2, "wcache")) {
4577           if (off)
4578             cfg.set_wcache = -1;
4579           else if (on)
4580             cfg.set_wcache = 1;
4581           else
4582             badarg = true;
4583         }
4584         else if (!strcmp(arg2, "dsn")) {
4585           if (off)
4586             cfg.set_dsn = -1;
4587           else if (on)
4588             cfg.set_dsn = 1;
4589           else
4590             badarg = true;
4591         }
4592         else
4593           badarg = true;
4594       }
4595       else
4596         badarg = true;
4597     }
4598     break;
4599 
4600   default:
4601     // Directive not recognized
4602     PrintOut(LOG_CRIT,"File %s line %d (drive %s): unknown Directive: %s\n",
4603              configfile, lineno, name, token);
4604     Directives();
4605     return -1;
4606   }
4607   if (missingarg) {
4608     PrintOut(LOG_CRIT, "File %s line %d (drive %s): Missing argument to %s Directive\n",
4609              configfile, lineno, name, token);
4610   }
4611   if (badarg) {
4612     PrintOut(LOG_CRIT, "File %s line %d (drive %s): Invalid argument to %s Directive: %s\n",
4613              configfile, lineno, name, token, arg);
4614   }
4615   if (missingarg || badarg) {
4616     PrintOut(LOG_CRIT, "Valid arguments to %s Directive are: ", token);
4617     printoutvaliddirectiveargs(LOG_CRIT, sym);
4618     PrintOut(LOG_CRIT, "\n");
4619     return -1;
4620   }
4621 
4622   return 1;
4623 }
4624 
4625 // Scan directive for configuration file
4626 #define SCANDIRECTIVE "DEVICESCAN"
4627 
4628 // This is the routine that adds things to the conf_entries list.
4629 //
4630 // Return values are:
4631 //  1: parsed a normal line
4632 //  0: found DEFAULT setting or comment or blank line
4633 // -1: found SCANDIRECTIVE line
4634 // -2: found an error
4635 //
4636 // Note: this routine modifies *line from the caller!
ParseConfigLine(dev_config_vector & conf_entries,dev_config & default_conf,smart_devtype_list & scan_types,int lineno,char * line)4637 static int ParseConfigLine(dev_config_vector & conf_entries, dev_config & default_conf,
4638   smart_devtype_list & scan_types, int lineno, /*const*/ char * line)
4639 {
4640   const char *delim = " \n\t";
4641 
4642   // get first token: device name. If a comment, skip line
4643   const char * name = strtok(line, delim);
4644   if (!name || *name == '#')
4645     return 0;
4646 
4647   // Check device name for DEFAULT or DEVICESCAN
4648   int retval;
4649   if (!strcmp("DEFAULT", name)) {
4650     retval = 0;
4651     // Restart with empty defaults
4652     default_conf = dev_config();
4653   }
4654   else {
4655     retval = (!strcmp(SCANDIRECTIVE, name) ? -1 : 1);
4656     // Init new entry with current defaults
4657     conf_entries.push_back(default_conf);
4658   }
4659   dev_config & cfg = (retval ? conf_entries.back() : default_conf);
4660 
4661   cfg.name = name; // Later replaced by dev->get_info().info_name
4662   cfg.dev_name = name; // If DEVICESCAN later replaced by get->dev_info().dev_name
4663   cfg.lineno = lineno;
4664 
4665   // parse tokens one at a time from the file.
4666   while (char * token = strtok(0, delim)) {
4667     int rc = ParseToken(token, cfg, scan_types);
4668     if (rc < 0)
4669       // error found on the line
4670       return -2;
4671 
4672     if (rc == 0)
4673       // No tokens left
4674       break;
4675 
4676     // PrintOut(LOG_INFO,"Parsed token %s\n",token);
4677   }
4678 
4679   // Check for multiple -d TYPE directives
4680   if (retval != -1 && scan_types.size() > 1) {
4681     PrintOut(LOG_CRIT, "Drive: %s, invalid multiple -d TYPE Directives on line %d of file %s\n",
4682              cfg.name.c_str(), cfg.lineno, configfile);
4683     return -2;
4684   }
4685 
4686   // Don't perform checks below for DEFAULT entries
4687   if (retval == 0)
4688     return retval;
4689 
4690   // If NO monitoring directives are set, then set all of them.
4691   if (!(   cfg.smartcheck  || cfg.selftest
4692         || cfg.errorlog    || cfg.xerrorlog
4693         || cfg.offlinests  || cfg.selfteststs
4694         || cfg.usagefailed || cfg.prefail  || cfg.usage
4695         || cfg.tempdiff    || cfg.tempinfo || cfg.tempcrit)) {
4696 
4697     PrintOut(LOG_INFO,"Drive: %s, implied '-a' Directive on line %d of file %s\n",
4698              cfg.name.c_str(), cfg.lineno, configfile);
4699 
4700     cfg.smartcheck = true;
4701     cfg.usagefailed = true;
4702     cfg.prefail = true;
4703     cfg.usage = true;
4704     cfg.selftest = true;
4705     cfg.errorlog = true;
4706     cfg.selfteststs = true;
4707   }
4708 
4709   // additional sanity check. Has user set -M options without -m?
4710   if (cfg.emailaddress.empty() && (!cfg.emailcmdline.empty() || cfg.emailfreq || cfg.emailtest)){
4711     PrintOut(LOG_CRIT,"Drive: %s, -M Directive(s) on line %d of file %s need -m ADDRESS Directive\n",
4712              cfg.name.c_str(), cfg.lineno, configfile);
4713     return -2;
4714   }
4715 
4716   // has the user has set <nomailer>?
4717   if (cfg.emailaddress == "<nomailer>") {
4718     // check that -M exec is also set
4719     if (cfg.emailcmdline.empty()){
4720       PrintOut(LOG_CRIT,"Drive: %s, -m <nomailer> Directive on line %d of file %s needs -M exec Directive\n",
4721                cfg.name.c_str(), cfg.lineno, configfile);
4722       return -2;
4723     }
4724     // From here on the sign of <nomailer> is cfg.emailaddress.empty() and !cfg.emailcmdline.empty()
4725     cfg.emailaddress.clear();
4726   }
4727 
4728   return retval;
4729 }
4730 
4731 // Parses a configuration file.  Return values are:
4732 //  N=>0: found N entries
4733 // -1:    syntax error in config file
4734 // -2:    config file does not exist
4735 // -3:    config file exists but cannot be read
4736 //
4737 // In the case where the return value is 0, there are three
4738 // possibilities:
4739 // Empty configuration file ==> conf_entries.empty()
4740 // No configuration file    ==> conf_entries[0].lineno == 0
4741 // SCANDIRECTIVE found      ==> conf_entries.back().lineno != 0 (size >= 1)
ParseConfigFile(dev_config_vector & conf_entries,smart_devtype_list & scan_types)4742 static int ParseConfigFile(dev_config_vector & conf_entries, smart_devtype_list & scan_types)
4743 {
4744   // maximum line length in configuration file
4745   const int MAXLINELEN = 256;
4746   // maximum length of a continued line in configuration file
4747   const int MAXCONTLINE = 1023;
4748 
4749   stdio_file f;
4750   // Open config file, if it exists and is not <stdin>
4751   if (!(configfile == configfile_stdin)) { // pointer comparison ok here
4752     if (!f.open(configfile,"r") && (errno!=ENOENT || !configfile_alt.empty())) {
4753       // file exists but we can't read it or it should exist due to '-c' option
4754       int ret = (errno!=ENOENT ? -3 : -2);
4755       PrintOut(LOG_CRIT,"%s: Unable to open configuration file %s\n",
4756                strerror(errno),configfile);
4757       return ret;
4758     }
4759   }
4760   else // read from stdin ('-c -' option)
4761     f.open(stdin);
4762 
4763   // Start with empty defaults
4764   dev_config default_conf;
4765 
4766   // No configuration file found -- use fake one
4767   int entry = 0;
4768   if (!f) {
4769     char fakeconfig[] = SCANDIRECTIVE " -a"; // TODO: Remove this hack, build cfg_entry.
4770 
4771     if (ParseConfigLine(conf_entries, default_conf, scan_types, 0, fakeconfig) != -1)
4772       throw std::logic_error("Internal error parsing " SCANDIRECTIVE);
4773     return 0;
4774   }
4775 
4776 #ifdef __CYGWIN__
4777   setmode(fileno(f), O_TEXT); // Allow files with \r\n
4778 #endif
4779 
4780   // configuration file exists
4781   PrintOut(LOG_INFO,"Opened configuration file %s\n",configfile);
4782 
4783   // parse config file line by line
4784   int lineno = 1, cont = 0, contlineno = 0;
4785   char line[MAXLINELEN+2];
4786   char fullline[MAXCONTLINE+1];
4787 
4788   for (;;) {
4789     int len=0,scandevice;
4790     char *lastslash;
4791     char *comment;
4792     char *code;
4793 
4794     // make debugging simpler
4795     memset(line,0,sizeof(line));
4796 
4797     // get a line
4798     code=fgets(line, MAXLINELEN+2, f);
4799 
4800     // are we at the end of the file?
4801     if (!code){
4802       if (cont) {
4803         scandevice = ParseConfigLine(conf_entries, default_conf, scan_types, contlineno, fullline);
4804         // See if we found a SCANDIRECTIVE directive
4805         if (scandevice==-1)
4806           return 0;
4807         // did we find a syntax error
4808         if (scandevice==-2)
4809           return -1;
4810         // the final line is part of a continuation line
4811         entry+=scandevice;
4812       }
4813       break;
4814     }
4815 
4816     // input file line number
4817     contlineno++;
4818 
4819     // See if line is too long
4820     len=strlen(line);
4821     if (len>MAXLINELEN){
4822       const char *warn;
4823       if (line[len-1]=='\n')
4824         warn="(including newline!) ";
4825       else
4826         warn="";
4827       PrintOut(LOG_CRIT,"Error: line %d of file %s %sis more than MAXLINELEN=%d characters.\n",
4828                (int)contlineno,configfile,warn,(int)MAXLINELEN);
4829       return -1;
4830     }
4831 
4832     // Ignore anything after comment symbol
4833     if ((comment=strchr(line,'#'))){
4834       *comment='\0';
4835       len=strlen(line);
4836     }
4837 
4838     // is the total line (made of all continuation lines) too long?
4839     if (cont+len>MAXCONTLINE){
4840       PrintOut(LOG_CRIT,"Error: continued line %d (actual line %d) of file %s is more than MAXCONTLINE=%d characters.\n",
4841                lineno, (int)contlineno, configfile, (int)MAXCONTLINE);
4842       return -1;
4843     }
4844 
4845     // copy string so far into fullline, and increment length
4846     snprintf(fullline+cont, sizeof(fullline)-cont, "%s" ,line);
4847     cont+=len;
4848 
4849     // is this a continuation line.  If so, replace \ by space and look at next line
4850     if ( (lastslash=strrchr(line,'\\')) && !strtok(lastslash+1," \n\t")){
4851       *(fullline+(cont-len)+(lastslash-line))=' ';
4852       continue;
4853     }
4854 
4855     // Not a continuation line. Parse it
4856     scan_types.clear();
4857     scandevice = ParseConfigLine(conf_entries, default_conf, scan_types, contlineno, fullline);
4858 
4859     // did we find a scandevice directive?
4860     if (scandevice==-1)
4861       return 0;
4862     // did we find a syntax error
4863     if (scandevice==-2)
4864       return -1;
4865 
4866     entry+=scandevice;
4867     lineno++;
4868     cont=0;
4869   }
4870 
4871   // note -- may be zero if syntax of file OK, but no valid entries!
4872   return entry;
4873 }
4874 
4875 /* Prints the message "=======> VALID ARGUMENTS ARE: <LIST>  <=======\n", where
4876    <LIST> is the list of valid arguments for option opt. */
PrintValidArgs(char opt)4877 static void PrintValidArgs(char opt)
4878 {
4879   const char *s;
4880 
4881   PrintOut(LOG_CRIT, "=======> VALID ARGUMENTS ARE: ");
4882   if (!(s = GetValidArgList(opt)))
4883     PrintOut(LOG_CRIT, "Error constructing argument list for option %c", opt);
4884   else
4885     PrintOut(LOG_CRIT, "%s", (char *)s);
4886   PrintOut(LOG_CRIT, " <=======\n");
4887 }
4888 
4889 #ifndef _WIN32
4890 // Report error and return false if specified path is not absolute.
check_abs_path(char option,const std::string & path)4891 static bool check_abs_path(char option, const std::string & path)
4892 {
4893   if (path.empty() || path[0] == '/')
4894     return true;
4895 
4896   debugmode = 1;
4897   PrintHead();
4898   PrintOut(LOG_CRIT, "=======> INVALID ARGUMENT TO -%c: %s <=======\n\n", option, path.c_str());
4899   PrintOut(LOG_CRIT, "Error: relative path names are not allowed\n\n");
4900   return false;
4901 }
4902 #endif // !_WIN32
4903 
4904 // Parses input line, prints usage message and
4905 // version/license/copyright messages
parse_options(int argc,char ** argv)4906 static int parse_options(int argc, char **argv)
4907 {
4908   // Init default path names
4909 #ifndef _WIN32
4910   configfile = SMARTMONTOOLS_SYSCONFDIR "/smartd.conf";
4911   warning_script = SMARTMONTOOLS_SMARTDSCRIPTDIR "/smartd_warning.sh";
4912 #else
4913   std::string exedir = get_exe_dir();
4914   static std::string configfile_str = exedir + "/smartd.conf";
4915   configfile = configfile_str.c_str();
4916   warning_script = exedir + "/smartd_warning.cmd";
4917 #endif
4918 
4919   // Please update GetValidArgList() if you edit shortopts
4920   static const char shortopts[] = "c:l:q:dDni:p:r:s:A:B:w:Vh?"
4921 #ifdef HAVE_LIBCAP_NG
4922                                                           "C"
4923 #endif
4924                                                              ;
4925   // Please update GetValidArgList() if you edit longopts
4926   struct option longopts[] = {
4927     { "configfile",     required_argument, 0, 'c' },
4928     { "logfacility",    required_argument, 0, 'l' },
4929     { "quit",           required_argument, 0, 'q' },
4930     { "debug",          no_argument,       0, 'd' },
4931     { "showdirectives", no_argument,       0, 'D' },
4932     { "interval",       required_argument, 0, 'i' },
4933 #ifndef _WIN32
4934     { "no-fork",        no_argument,       0, 'n' },
4935 #else
4936     { "service",        no_argument,       0, 'n' },
4937 #endif
4938     { "pidfile",        required_argument, 0, 'p' },
4939     { "report",         required_argument, 0, 'r' },
4940     { "savestates",     required_argument, 0, 's' },
4941     { "attributelog",   required_argument, 0, 'A' },
4942     { "drivedb",        required_argument, 0, 'B' },
4943     { "warnexec",       required_argument, 0, 'w' },
4944     { "version",        no_argument,       0, 'V' },
4945     { "license",        no_argument,       0, 'V' },
4946     { "copyright",      no_argument,       0, 'V' },
4947     { "help",           no_argument,       0, 'h' },
4948     { "usage",          no_argument,       0, 'h' },
4949 #ifdef HAVE_LIBCAP_NG
4950     { "capabilities",   no_argument,       0, 'C' },
4951 #endif
4952     { 0,                0,                 0, 0   }
4953   };
4954 
4955   opterr=optopt=0;
4956   bool badarg = false;
4957   bool use_default_db = true; // set false on '-B FILE'
4958 
4959   // Parse input options.
4960   int optchar;
4961   while ((optchar = getopt_long(argc, argv, shortopts, longopts, NULL)) != -1) {
4962     char *arg;
4963     char *tailptr;
4964     long lchecktime;
4965 
4966     switch(optchar) {
4967     case 'q':
4968       // when to quit
4969       if (!strcmp(optarg, "nodev"))
4970         quit = QUIT_NODEV;
4971       else if (!strcmp(optarg, "nodevstartup"))
4972         quit = QUIT_NODEVSTARTUP;
4973       else if (!strcmp(optarg, "never"))
4974         quit = QUIT_NEVER;
4975       else if (!strcmp(optarg, "onecheck")) {
4976         quit = QUIT_ONECHECK;
4977         debugmode = 1;
4978       }
4979       else if (!strcmp(optarg, "showtests")) {
4980         quit = QUIT_SHOWTESTS;
4981         debugmode = 1;
4982       }
4983       else if (!strcmp(optarg, "errors"))
4984         quit = QUIT_ERRORS;
4985       else
4986         badarg = true;
4987       break;
4988     case 'l':
4989       // set the log facility level
4990       if (!strcmp(optarg, "daemon"))
4991         facility=LOG_DAEMON;
4992       else if (!strcmp(optarg, "local0"))
4993         facility=LOG_LOCAL0;
4994       else if (!strcmp(optarg, "local1"))
4995         facility=LOG_LOCAL1;
4996       else if (!strcmp(optarg, "local2"))
4997         facility=LOG_LOCAL2;
4998       else if (!strcmp(optarg, "local3"))
4999         facility=LOG_LOCAL3;
5000       else if (!strcmp(optarg, "local4"))
5001         facility=LOG_LOCAL4;
5002       else if (!strcmp(optarg, "local5"))
5003         facility=LOG_LOCAL5;
5004       else if (!strcmp(optarg, "local6"))
5005         facility=LOG_LOCAL6;
5006       else if (!strcmp(optarg, "local7"))
5007         facility=LOG_LOCAL7;
5008       else
5009         badarg = true;
5010       break;
5011     case 'd':
5012       // enable debug mode
5013       debugmode = 1;
5014       break;
5015     case 'n':
5016       // don't fork()
5017 #ifndef _WIN32 // On Windows, --service is already handled by daemon_main()
5018       do_fork = false;
5019 #endif
5020       break;
5021     case 'D':
5022       // print summary of all valid directives
5023       debugmode = 1;
5024       Directives();
5025       return 0;
5026     case 'i':
5027       // Period (time interval) for checking
5028       // strtol will set errno in the event of overflow, so we'll check it.
5029       errno = 0;
5030       lchecktime = strtol(optarg, &tailptr, 10);
5031       if (*tailptr != '\0' || lchecktime < 10 || lchecktime > INT_MAX || errno) {
5032         debugmode=1;
5033         PrintHead();
5034         PrintOut(LOG_CRIT, "======> INVALID INTERVAL: %s <=======\n", optarg);
5035         PrintOut(LOG_CRIT, "======> INTERVAL MUST BE INTEGER BETWEEN %d AND %d <=======\n", 10, INT_MAX);
5036         PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
5037         return EXIT_BADCMD;
5038       }
5039       checktime = (int)lchecktime;
5040       break;
5041     case 'r':
5042       // report IOCTL transactions
5043       {
5044         int n1 = -1, n2 = -1, len = strlen(optarg);
5045         char s[9+1]; unsigned i = 1;
5046         sscanf(optarg, "%9[a-z]%n,%u%n", s, &n1, &i, &n2);
5047         if (!((n1 == len || n2 == len) && 1 <= i && i <= 4)) {
5048           badarg = true;
5049         } else if (!strcmp(s,"ioctl")) {
5050           ata_debugmode = scsi_debugmode = nvme_debugmode = i;
5051         } else if (!strcmp(s,"ataioctl")) {
5052           ata_debugmode = i;
5053         } else if (!strcmp(s,"scsiioctl")) {
5054           scsi_debugmode = i;
5055         } else if (!strcmp(s,"nvmeioctl")) {
5056           nvme_debugmode = i;
5057         } else {
5058           badarg = true;
5059         }
5060       }
5061       break;
5062     case 'c':
5063       // alternate configuration file
5064       if (strcmp(optarg,"-"))
5065         configfile = (configfile_alt = optarg).c_str();
5066       else // read from stdin
5067         configfile=configfile_stdin;
5068       break;
5069     case 'p':
5070       // output file with PID number
5071       pid_file = optarg;
5072       break;
5073     case 's':
5074       // path prefix of persistent state file
5075       state_path_prefix = optarg;
5076       break;
5077     case 'A':
5078       // path prefix of attribute log file
5079       attrlog_path_prefix = optarg;
5080       break;
5081     case 'B':
5082       {
5083         const char * path = optarg;
5084         if (*path == '+' && path[1])
5085           path++;
5086         else
5087           use_default_db = false;
5088         unsigned char savedebug = debugmode; debugmode = 1;
5089         if (!read_drive_database(path))
5090           return EXIT_BADCMD;
5091         debugmode = savedebug;
5092       }
5093       break;
5094     case 'w':
5095       warning_script = optarg;
5096       break;
5097     case 'V':
5098       // print version and CVS info
5099       debugmode = 1;
5100       PrintOut(LOG_INFO, "%s", format_version_info("smartd", true /*full*/).c_str());
5101       return 0;
5102 #ifdef HAVE_LIBCAP_NG
5103     case 'C':
5104       // enable capabilities
5105       capabilities_enabled = true;
5106       break;
5107 #endif
5108     case 'h':
5109       // help: print summary of command-line options
5110       debugmode=1;
5111       PrintHead();
5112       Usage();
5113       return 0;
5114     case '?':
5115     default:
5116       // unrecognized option
5117       debugmode=1;
5118       PrintHead();
5119       // Point arg to the argument in which this option was found.
5120       arg = argv[optind-1];
5121       // Check whether the option is a long option that doesn't map to -h.
5122       if (arg[1] == '-' && optchar != 'h') {
5123         // Iff optopt holds a valid option then argument must be missing.
5124         if (optopt && (strchr(shortopts, optopt) != NULL)) {
5125           PrintOut(LOG_CRIT, "=======> ARGUMENT REQUIRED FOR OPTION: %s <=======\n",arg+2);
5126           PrintValidArgs(optopt);
5127         } else {
5128           PrintOut(LOG_CRIT, "=======> UNRECOGNIZED OPTION: %s <=======\n\n",arg+2);
5129         }
5130         PrintOut(LOG_CRIT, "\nUse smartd --help to get a usage summary\n\n");
5131         return EXIT_BADCMD;
5132       }
5133       if (optopt) {
5134         // Iff optopt holds a valid option then argument must be missing.
5135         if (strchr(shortopts, optopt) != NULL){
5136           PrintOut(LOG_CRIT, "=======> ARGUMENT REQUIRED FOR OPTION: %c <=======\n",optopt);
5137           PrintValidArgs(optopt);
5138         } else {
5139           PrintOut(LOG_CRIT, "=======> UNRECOGNIZED OPTION: %c <=======\n\n",optopt);
5140         }
5141         PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
5142         return EXIT_BADCMD;
5143       }
5144       Usage();
5145       return 0;
5146     }
5147 
5148     // Check to see if option had an unrecognized or incorrect argument.
5149     if (badarg) {
5150       debugmode=1;
5151       PrintHead();
5152       // It would be nice to print the actual option name given by the user
5153       // here, but we just print the short form.  Please fix this if you know
5154       // a clean way to do it.
5155       PrintOut(LOG_CRIT, "=======> INVALID ARGUMENT TO -%c: %s <======= \n", optchar, optarg);
5156       PrintValidArgs(optchar);
5157       PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
5158       return EXIT_BADCMD;
5159     }
5160   }
5161 
5162   // non-option arguments are not allowed
5163   if (argc > optind) {
5164     debugmode=1;
5165     PrintHead();
5166     PrintOut(LOG_CRIT, "=======> UNRECOGNIZED ARGUMENT: %s <=======\n\n", argv[optind]);
5167     PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
5168     return EXIT_BADCMD;
5169   }
5170 
5171   // no pidfile in debug mode
5172   if (debugmode && !pid_file.empty()) {
5173     debugmode=1;
5174     PrintHead();
5175     PrintOut(LOG_CRIT, "=======> INVALID CHOICE OF OPTIONS: -d and -p <======= \n\n");
5176     PrintOut(LOG_CRIT, "Error: pid file %s not written in debug (-d) mode\n\n", pid_file.c_str());
5177     return EXIT_BADCMD;
5178   }
5179 
5180 #ifndef _WIN32
5181   if (!debugmode) {
5182     // absolute path names are required due to chdir('/') in daemon_init()
5183     if (!(   check_abs_path('p', pid_file)
5184           && check_abs_path('s', state_path_prefix)
5185           && check_abs_path('A', attrlog_path_prefix)))
5186       return EXIT_BADCMD;
5187   }
5188 #endif
5189 
5190   // Read or init drive database
5191   {
5192     unsigned char savedebug = debugmode; debugmode = 1;
5193     if (!init_drive_database(use_default_db))
5194       return EXIT_BADCMD;
5195     debugmode = savedebug;
5196   }
5197 
5198   // Check option compatibility of notify support
5199     // cppcheck-suppress knownConditionTrueFalse
5200   if (!notify_post_init())
5201     return EXIT_BADCMD;
5202 
5203   // print header
5204   PrintHead();
5205 
5206   // No error, continue in main_worker()
5207   return -1;
5208 }
5209 
5210 // Function we call if no configuration file was found or if the
5211 // SCANDIRECTIVE Directive was found.  It makes entries for device
5212 // names returned by scan_smart_devices() in os_OSNAME.cpp
MakeConfigEntries(const dev_config & base_cfg,dev_config_vector & conf_entries,smart_device_list & scanned_devs,const smart_devtype_list & types)5213 static int MakeConfigEntries(const dev_config & base_cfg,
5214   dev_config_vector & conf_entries, smart_device_list & scanned_devs,
5215   const smart_devtype_list & types)
5216 {
5217   // make list of devices
5218   smart_device_list devlist;
5219   if (!smi()->scan_smart_devices(devlist, types)) {
5220     PrintOut(LOG_CRIT, "DEVICESCAN failed: %s\n", smi()->get_errmsg());
5221     return 0;
5222   }
5223 
5224   // if no devices, return
5225   if (devlist.size() <= 0)
5226     return 0;
5227 
5228   // add empty device slots for existing config entries
5229   while (scanned_devs.size() < conf_entries.size())
5230     scanned_devs.push_back((smart_device *)0);
5231 
5232   // loop over entries to create
5233   for (unsigned i = 0; i < devlist.size(); i++) {
5234     // Move device pointer
5235     smart_device * dev = devlist.release(i);
5236     scanned_devs.push_back(dev);
5237 
5238     // Append configuration and update names
5239     conf_entries.push_back(base_cfg);
5240     dev_config & cfg = conf_entries.back();
5241     cfg.name = dev->get_info().info_name;
5242     cfg.dev_name = dev->get_info().dev_name;
5243 
5244     // Set type only if scanning is limited to specific types
5245     // This is later used to set SMARTD_DEVICETYPE environment variable
5246     if (!types.empty())
5247       cfg.dev_type = dev->get_info().dev_type;
5248     else // SMARTD_DEVICETYPE=auto
5249       cfg.dev_type.clear();
5250   }
5251 
5252   return devlist.size();
5253 }
5254 
5255 // Returns negative value (see ParseConfigFile()) if config file
5256 // had errors, else number of entries which may be zero or positive.
ReadOrMakeConfigEntries(dev_config_vector & conf_entries,smart_device_list & scanned_devs)5257 static int ReadOrMakeConfigEntries(dev_config_vector & conf_entries, smart_device_list & scanned_devs)
5258 {
5259   // parse configuration file configfile (normally /etc/smartd.conf)
5260   smart_devtype_list scan_types;
5261   int entries = ParseConfigFile(conf_entries, scan_types);
5262 
5263   if (entries < 0) {
5264     // There was an error reading the configuration file.
5265     conf_entries.clear();
5266     if (entries == -1)
5267       PrintOut(LOG_CRIT, "Configuration file %s has fatal syntax errors.\n", configfile);
5268     return entries;
5269   }
5270 
5271   // no error parsing config file.
5272   if (entries) {
5273     // we did not find a SCANDIRECTIVE and did find valid entries
5274     PrintOut(LOG_INFO, "Configuration file %s parsed.\n", configfile);
5275   }
5276   else if (!conf_entries.empty()) {
5277     // we found a SCANDIRECTIVE or there was no configuration file so
5278     // scan.  Configuration file's last entry contains all options
5279     // that were set
5280     dev_config first = conf_entries.back();
5281     conf_entries.pop_back();
5282 
5283     if (first.lineno)
5284       PrintOut(LOG_INFO,"Configuration file %s was parsed, found %s, scanning devices\n", configfile, SCANDIRECTIVE);
5285     else
5286       PrintOut(LOG_INFO,"No configuration file %s found, scanning devices\n", configfile);
5287 
5288     // make config list of devices to search for
5289     MakeConfigEntries(first, conf_entries, scanned_devs, scan_types);
5290 
5291     // warn user if scan table found no devices
5292     if (conf_entries.empty())
5293       PrintOut(LOG_CRIT,"In the system's table of devices NO devices found to scan\n");
5294   }
5295   else
5296     PrintOut(LOG_CRIT, "Configuration file %s parsed but has no entries\n", configfile);
5297 
5298   return conf_entries.size();
5299 }
5300 
5301 // Register one device, return false on error
register_device(dev_config & cfg,dev_state & state,smart_device_auto_ptr & dev,const dev_config_vector * prev_cfgs)5302 static bool register_device(dev_config & cfg, dev_state & state, smart_device_auto_ptr & dev,
5303                             const dev_config_vector * prev_cfgs)
5304 {
5305   bool scanning;
5306   if (!dev) {
5307     // Get device of appropriate type
5308     dev = smi()->get_smart_device(cfg.name.c_str(), cfg.dev_type.c_str());
5309     if (!dev) {
5310       if (cfg.dev_type.empty())
5311         PrintOut(LOG_INFO, "Device: %s, unable to autodetect device type\n", cfg.name.c_str());
5312       else
5313         PrintOut(LOG_INFO, "Device: %s, unsupported device type '%s'\n", cfg.name.c_str(), cfg.dev_type.c_str());
5314       return false;
5315     }
5316     scanning = false;
5317   }
5318   else {
5319     // Use device from device scan
5320     scanning = true;
5321   }
5322 
5323   // Save old info
5324   smart_device::device_info oldinfo = dev->get_info();
5325 
5326   // Open with autodetect support, may return 'better' device
5327   dev.replace( dev->autodetect_open() );
5328 
5329   // Report if type has changed
5330   if (oldinfo.dev_type != dev->get_dev_type())
5331     PrintOut(LOG_INFO, "Device: %s, type changed from '%s' to '%s'\n",
5332       cfg.name.c_str(), oldinfo.dev_type.c_str(), dev->get_dev_type());
5333 
5334   // Return if autodetect_open() failed
5335   if (!dev->is_open()) {
5336     if (debugmode || !scanning)
5337       PrintOut(LOG_INFO, "Device: %s, open() failed: %s\n", dev->get_info_name(), dev->get_errmsg());
5338     return false;
5339   }
5340 
5341   // Update informal name
5342   cfg.name = dev->get_info().info_name;
5343   PrintOut(LOG_INFO, "Device: %s, opened\n", cfg.name.c_str());
5344 
5345   int status;
5346   const char * typemsg;
5347   // register ATA device
5348   if (dev->is_ata()){
5349     typemsg = "ATA";
5350     status = ATADeviceScan(cfg, state, dev->to_ata(), prev_cfgs);
5351   }
5352   // or register SCSI device
5353   else if (dev->is_scsi()){
5354     typemsg = "SCSI";
5355     status = SCSIDeviceScan(cfg, state, dev->to_scsi(), prev_cfgs);
5356   }
5357   // or register NVMe device
5358   else if (dev->is_nvme()) {
5359     typemsg = "NVMe";
5360     status = NVMeDeviceScan(cfg, state, dev->to_nvme(), prev_cfgs);
5361   }
5362   else {
5363     PrintOut(LOG_INFO, "Device: %s, neither ATA, SCSI nor NVMe device\n", cfg.name.c_str());
5364     return false;
5365   }
5366 
5367   if (status) {
5368     if (!scanning || debugmode) {
5369       if (cfg.lineno)
5370         PrintOut(scanning ? LOG_INFO : LOG_CRIT,
5371           "Unable to register %s device %s at line %d of file %s\n",
5372           typemsg, cfg.name.c_str(), cfg.lineno, configfile);
5373       else
5374         PrintOut(LOG_INFO, "Unable to register %s device %s\n",
5375           typemsg, cfg.name.c_str());
5376     }
5377 
5378     return false;
5379   }
5380 
5381   return true;
5382 }
5383 
5384 // This function tries devices from conf_entries.  Each one that can be
5385 // registered is moved onto the [ata|scsi]devices lists and removed
5386 // from the conf_entries list.
register_devices(const dev_config_vector & conf_entries,smart_device_list & scanned_devs,dev_config_vector & configs,dev_state_vector & states,smart_device_list & devices)5387 static bool register_devices(const dev_config_vector & conf_entries, smart_device_list & scanned_devs,
5388                              dev_config_vector & configs, dev_state_vector & states, smart_device_list & devices)
5389 {
5390   // start by clearing lists/memory of ALL existing devices
5391   configs.clear();
5392   devices.clear();
5393   states.clear();
5394 
5395   // Map of already seen non-DEVICESCAN devices (unique_name -> cfg.name)
5396   typedef std::map<std::string, std::string> prev_unique_names_map;
5397   prev_unique_names_map prev_unique_names;
5398 
5399   // Register entries
5400   for (unsigned i = 0; i < conf_entries.size(); i++) {
5401     dev_config cfg = conf_entries[i];
5402 
5403     // Get unique device "name [type]" (with symlinks resolved) for duplicate detection
5404     std::string unique_name = smi()->get_unique_dev_name(cfg.dev_name.c_str(), cfg.dev_type.c_str());
5405     if (debugmode && unique_name != cfg.dev_name) {
5406       pout("Device: %s%s%s%s, unique name: %s\n", cfg.name.c_str(),
5407            (!cfg.dev_type.empty() ? " [" : ""), cfg.dev_type.c_str(),
5408            (!cfg.dev_type.empty() ? "]" : ""), unique_name.c_str());
5409     }
5410 
5411     if (cfg.ignore) {
5412       // Store for duplicate detection and ignore
5413       PrintOut(LOG_INFO, "Device: %s%s%s%s, ignored\n", cfg.name.c_str(),
5414                (!cfg.dev_type.empty() ? " [" : ""), cfg.dev_type.c_str(),
5415                (!cfg.dev_type.empty() ? "]" : ""));
5416       prev_unique_names[unique_name] = cfg.name;
5417       continue;
5418     }
5419 
5420     smart_device_auto_ptr dev;
5421 
5422     // Device may already be detected during devicescan
5423     bool scanning = false;
5424     if (i < scanned_devs.size()) {
5425       dev = scanned_devs.release(i);
5426       if (dev) {
5427         // Check for a preceding non-DEVICESCAN entry for the same device
5428         prev_unique_names_map::iterator ui = prev_unique_names.find(unique_name);
5429         if (ui != prev_unique_names.end()) {
5430           bool ne = (ui->second != cfg.name);
5431           PrintOut(LOG_INFO, "Device: %s, %s%s, ignored\n", dev->get_info_name(),
5432                    (ne ? "same as " : "duplicate"), (ne ? ui->second.c_str() : ""));
5433           continue;
5434         }
5435         scanning = true;
5436       }
5437     }
5438 
5439     // Register device
5440     // If scanning, pass dev_idinfo of previous devices for duplicate check
5441     dev_state state;
5442     if (!register_device(cfg, state, dev, (scanning ? &configs : 0))) {
5443       // if device is explicitly listed and we can't register it, then
5444       // exit unless the user has specified that the device is removable
5445       if (!scanning) {
5446         if (!(cfg.removable || quit == QUIT_NEVER)) {
5447           PrintOut(LOG_CRIT, "Unable to register device %s (no Directive -d removable). Exiting.\n",
5448                    cfg.name.c_str());
5449           return false;
5450         }
5451         PrintOut(LOG_INFO, "Device: %s, not available\n", cfg.name.c_str());
5452         // Prevent retry of registration
5453         prev_unique_names[unique_name] = cfg.name;
5454       }
5455       continue;
5456     }
5457 
5458     // move onto the list of devices
5459     configs.push_back(cfg);
5460     states.push_back(state);
5461     devices.push_back(dev);
5462     if (!scanning)
5463       // Store for duplicate detection
5464       prev_unique_names[unique_name] = cfg.name;
5465   }
5466 
5467   // Set factors for staggered tests
5468   for (unsigned i = 0, factor = 0; i < configs.size(); i++) {
5469     dev_config & cfg = configs[i];
5470     if (cfg.test_regex.empty())
5471       continue;
5472     cfg.test_offset_factor = factor++;
5473   }
5474 
5475   init_disable_standby_check(configs);
5476   return true;
5477 }
5478 
5479 
5480 // Main program without exception handling
main_worker(int argc,char ** argv)5481 static int main_worker(int argc, char **argv)
5482 {
5483   // Initialize interface
5484   smart_interface::init();
5485   if (!smi())
5486     return 1;
5487 
5488   // Check whether systemd notify is supported and enabled
5489   notify_init();
5490 
5491   // parse input and print header and usage info if needed
5492   int status = parse_options(argc,argv);
5493   if (status >= 0)
5494     return status;
5495 
5496   // Configuration for each device
5497   dev_config_vector configs;
5498   // Device states
5499   dev_state_vector states;
5500   // Devices to monitor
5501   smart_device_list devices;
5502 
5503   // Drop capabilities if supported and enabled
5504   capabilities_drop_now();
5505 
5506   notify_msg("Initializing ...");
5507 
5508   // the main loop of the code
5509   bool firstpass = true, write_states_always = true;
5510   time_t wakeuptime = 0;
5511   // assert(status < 0);
5512   do {
5513     // Should we (re)read the config file?
5514     if (firstpass || caughtsigHUP){
5515       if (!firstpass) {
5516         // Write state files
5517         if (!state_path_prefix.empty())
5518           write_all_dev_states(configs, states);
5519 
5520         PrintOut(LOG_INFO,
5521                  caughtsigHUP==1?
5522                  "Signal HUP - rereading configuration file %s\n":
5523                  "\a\nSignal INT - rereading configuration file %s (" SIGQUIT_KEYNAME " quits)\n\n",
5524                  configfile);
5525         notify_msg("Reloading ...");
5526       }
5527 
5528       {
5529         dev_config_vector conf_entries; // Entries read from smartd.conf
5530         smart_device_list scanned_devs; // Devices found during scan
5531         // (re)reads config file, makes >=0 entries
5532         int entries = ReadOrMakeConfigEntries(conf_entries, scanned_devs);
5533 
5534         if (entries>=0) {
5535           // checks devices, then moves onto ata/scsi list or deallocates.
5536           if (!register_devices(conf_entries, scanned_devs, configs, states, devices)) {
5537             status = EXIT_BADDEV;
5538             break;
5539           }
5540           if (!(configs.size() == devices.size() && configs.size() == states.size()))
5541             throw std::logic_error("Invalid result from RegisterDevices");
5542           // Handle limitations if capabilities are dropped
5543           capabilities_check_config(configs);
5544         }
5545         else if (   quit == QUIT_NEVER
5546                  || ((quit == QUIT_NODEV || quit == QUIT_NODEVSTARTUP) && !firstpass)) {
5547           // user has asked to continue on error in configuration file
5548           if (!firstpass)
5549             PrintOut(LOG_INFO,"Reusing previous configuration\n");
5550         }
5551         else {
5552           // exit with configuration file error status
5553           status = (entries == -3 ? EXIT_READCONF : entries == -2 ? EXIT_NOCONF : EXIT_BADCONF);
5554           break;
5555         }
5556       }
5557 
5558       if (!(   devices.size() > 0 || quit == QUIT_NEVER
5559             || (quit == QUIT_NODEVSTARTUP && !firstpass))) {
5560         PrintOut(LOG_INFO, "Unable to monitor any SMART enabled devices. %sExiting...\n",
5561                  (!debugmode ? "Try debug (-d) option. " : ""));
5562         status = EXIT_NODEV;
5563         break;
5564       }
5565 
5566       // Log number of devices we are monitoring...
5567       int numata = 0, numscsi = 0;
5568       for (unsigned i = 0; i < devices.size(); i++) {
5569         const smart_device * dev = devices.at(i);
5570         if (dev->is_ata())
5571           numata++;
5572         else if (dev->is_scsi())
5573           numscsi++;
5574       }
5575       PrintOut(LOG_INFO, "Monitoring %d ATA/SATA, %d SCSI/SAS and %d NVMe devices\n",
5576                numata, numscsi, (int)devices.size() - numata - numscsi);
5577 
5578       if (quit == QUIT_SHOWTESTS) {
5579         // user has asked to print test schedule
5580         PrintTestSchedule(configs, states, devices);
5581         // assert(firstpass);
5582         return 0;
5583       }
5584 
5585       // reset signal
5586       caughtsigHUP=0;
5587 
5588       // Always write state files after (re)configuration
5589       write_states_always = true;
5590     }
5591 
5592     // check all devices once,
5593     // self tests are not started in first pass unless '-q onecheck' is specified
5594     notify_check((int)devices.size());
5595     CheckDevicesOnce(configs, states, devices, firstpass, (!firstpass || quit == QUIT_ONECHECK));
5596 
5597      // Write state files
5598     if (!state_path_prefix.empty())
5599       write_all_dev_states(configs, states, write_states_always);
5600     write_states_always = false;
5601 
5602     // Write attribute logs
5603     if (!attrlog_path_prefix.empty())
5604       write_all_dev_attrlogs(configs, states);
5605 
5606     // user has asked us to exit after first check
5607     if (quit == QUIT_ONECHECK) {
5608       PrintOut(LOG_INFO,"Started with '-q onecheck' option. All devices successfully checked once.\n"
5609                "smartd is exiting (exit status 0)\n");
5610       // assert(firstpass);
5611       return 0;
5612     }
5613 
5614     if (firstpass) {
5615       if (!debugmode) {
5616         // fork() into background if needed, close ALL file descriptors,
5617         // redirect stdin, stdout, and stderr, chdir to "/".
5618         status = daemon_init();
5619         if (status >= 0)
5620           return status;
5621 
5622         // Write PID file if configured
5623         if (!write_pid_file())
5624           return EXIT_PID;
5625       }
5626 
5627       // Set exit and signal handlers
5628       install_signal_handlers();
5629 
5630       // Initialize wakeup time to CURRENT time
5631       wakeuptime = time(0);
5632 
5633       firstpass = false;
5634     }
5635 
5636     // sleep until next check time, or a signal arrives
5637     wakeuptime = dosleep(wakeuptime, write_states_always, (int)devices.size());
5638 
5639   } while (!caughtsigEXIT);
5640 
5641   if (caughtsigEXIT && status < 0) {
5642     // Loop exited on signal
5643     if (caughtsigEXIT == SIGTERM || (debugmode && caughtsigEXIT == SIGQUIT)) {
5644       PrintOut(LOG_INFO, "smartd received signal %d: %s\n",
5645                caughtsigEXIT, strsignal(caughtsigEXIT));
5646     }
5647     else {
5648       // Unexpected SIGINT or SIGQUIT
5649       PrintOut(LOG_CRIT, "smartd received unexpected signal %d: %s\n",
5650                caughtsigEXIT, strsignal(caughtsigEXIT));
5651       status = EXIT_SIGNAL;
5652     }
5653   }
5654 
5655   // Status unset above implies success
5656   if (status < 0)
5657     status = 0;
5658 
5659   if (!firstpass) {
5660     // Loop exited after daemon_init() and write_pid_file()
5661 
5662     // Write state files only on normal exit
5663     if (!status && !state_path_prefix.empty())
5664       write_all_dev_states(configs, states);
5665 
5666     // Delete PID file, if one was created
5667     if (!pid_file.empty() && unlink(pid_file.c_str()))
5668         PrintOut(LOG_CRIT,"Can't unlink PID file %s (%s).\n",
5669                  pid_file.c_str(), strerror(errno));
5670 
5671     // and this should be the final output from smartd before it exits
5672     PrintOut((status ? LOG_CRIT : LOG_INFO), "smartd is exiting (exit status %d)\n",
5673              status);
5674   }
5675 
5676   return status;
5677 }
5678 
5679 
5680 #ifndef _WIN32
5681 // Main program
main(int argc,char ** argv)5682 int main(int argc, char **argv)
5683 #else
5684 // Windows: internal main function started direct or by service control manager
5685 static int smartd_main(int argc, char **argv)
5686 #endif
5687 {
5688   int status;
5689   try {
5690     // Do the real work ...
5691     status = main_worker(argc, argv);
5692   }
5693   catch (const std::bad_alloc & /*ex*/) {
5694     // Memory allocation failed (also thrown by std::operator new)
5695     PrintOut(LOG_CRIT, "Smartd: Out of memory\n");
5696     status = EXIT_NOMEM;
5697   }
5698   catch (const std::exception & ex) {
5699     // Other fatal errors
5700     PrintOut(LOG_CRIT, "Smartd: Exception: %s\n", ex.what());
5701     status = EXIT_BADCODE;
5702   }
5703 
5704   // Check for remaining device objects
5705   if (smart_device::get_num_objects() != 0) {
5706     PrintOut(LOG_CRIT, "Smartd: Internal Error: %d device object(s) left at exit.\n",
5707              smart_device::get_num_objects());
5708     status = EXIT_BADCODE;
5709   }
5710 
5711   if (status == EXIT_BADCODE)
5712     PrintOut(LOG_CRIT, "Please inform " PACKAGE_BUGREPORT ", including output of smartd -V.\n");
5713 
5714   notify_exit(status);
5715 #ifdef _WIN32
5716   daemon_winsvc_exitcode = status;
5717 #endif
5718   return status;
5719 }
5720 
5721 
5722 #ifdef _WIN32
5723 // Main function for Windows
main(int argc,char ** argv)5724 int main(int argc, char **argv){
5725   // Options for smartd windows service
5726   static const daemon_winsvc_options svc_opts = {
5727     "--service", // cmd_opt
5728     "smartd", "SmartD Service", // servicename, displayname
5729     // description
5730     "Controls and monitors storage devices using the Self-Monitoring, "
5731     "Analysis and Reporting Technology System (SMART) built into "
5732     "ATA/SATA and SCSI/SAS hard drives and solid-state drives. "
5733     "www.smartmontools.org"
5734   };
5735   // daemon_main() handles daemon and service specific commands
5736   // and starts smartd_main() direct, from a new process,
5737   // or via service control manager
5738   return daemon_main("smartd", &svc_opts , smartd_main, argc, argv);
5739 }
5740 #endif
5741