1#+############################################################################## 2# # 3# File: No/Worries/PidFile.pm # 4# # 5# Description: pid file handling without worries # 6# # 7#-############################################################################## 8 9# 10# module definition 11# 12 13package No::Worries::PidFile; 14use strict; 15use warnings; 16our $VERSION = "1.6"; 17our $REVISION = sprintf("%d.%02d", q$Revision: 1.20 $ =~ /(\d+)\.(\d+)/); 18 19# 20# used modules 21# 22 23use Fcntl qw(:DEFAULT :flock :seek); 24use No::Worries qw($_IntegerRegexp $_NumberRegexp); 25use No::Worries::Die qw(dief); 26use No::Worries::Export qw(export_control); 27use No::Worries::Proc qw(proc_terminate); 28use No::Worries::Stat qw(ST_MTIME); 29use Params::Validate qw(validate :types); 30use POSIX qw(:errno_h); 31use Time::HiRes qw(); 32 33# 34# safely read something from an open file 35# 36 37sub _read ($$;$) { 38 my($path, $fh, $noclose) = @_; 39 my($data, $done); 40 41 flock($fh, LOCK_EX) 42 or dief("cannot flock(%s, LOCK_EX): %s", $path, $!); 43 sysseek($fh, 0, SEEK_SET) 44 or dief("cannot sysseek(%s, 0, SEEK_SET): %s", $path, $!); 45 $data = ""; 46 $done = -1; 47 while ($done) { 48 $done = sysread($fh, $data, 16, length($data)); 49 dief("cannot sysread(%s, %d): %s", $path, 16, $!) 50 unless defined($done); 51 } 52 if ($noclose) { 53 flock($fh, LOCK_UN) 54 or dief("cannot flock(%s, LOCK_UN): %s", $path, $!); 55 } else { 56 close($fh) 57 or dief("cannot close(%s): %s", $path, $!); 58 } 59 return($data); 60} 61 62# 63# safely write something to an open file 64# 65 66sub _write ($$$) { 67 my($path, $fh, $data) = @_; 68 my($length, $offset, $done); 69 70 flock($fh, LOCK_EX) 71 or dief("cannot flock(%s, LOCK_EX): %s", $path, $!); 72 sysseek($fh, 0, SEEK_SET) 73 or dief("cannot sysseek(%s, 0, SEEK_SET): %s", $path, $!); 74 truncate($fh, 0) 75 or dief("cannot truncate(%s, 0): %s", $path, $!); 76 $length = length($data); 77 $offset = 0; 78 while ($length) { 79 $done = syswrite($fh, $data, $length, $offset); 80 dief("cannot syswrite(%s, %d): %s", $path, $length, $!) 81 unless defined($done); 82 $length -= $done; 83 $offset += $done; 84 } 85 close($fh) 86 or dief("cannot close(%s): %s", $path, $!); 87} 88 89# 90# check if a process is alive by killing it ;-) 91# 92 93sub _alive ($) { 94 my($pid) = @_; 95 96 return(1) if kill(0, $pid); 97 return(0) if $! == ESRCH; 98 dief("cannot kill(0, %d): %s", $pid, $!); 99} 100 101# 102# kill a process 103# 104 105sub _kill ($$$%) { 106 my($path, $fh, $pid, %option) = @_; 107 my($maxtime); 108 109 # gently 110 $option{callback}->("(pid $pid) is being told to quit..."); 111 _write($path, $fh, "$pid\nquit\n"); 112 $maxtime = Time::HiRes::time() + $option{linger}; 113 while (1) { 114 last unless _alive($pid); 115 last if Time::HiRes::time() > $maxtime; 116 Time::HiRes::sleep(0.1); 117 } 118 if (_alive($pid)) { 119 # forcedly 120 $option{callback}->("(pid $pid) is still running, killing it now..."); 121 if ($option{kill}) { 122 proc_terminate($pid, kill => $option{kill}); 123 } else { 124 proc_terminate($pid); 125 } 126 $option{callback}->("(pid $pid) has been successfully killed"); 127 } else { 128 $option{callback}->("does not seem to be running anymore"); 129 } 130} 131 132# 133# check a process 134# 135 136sub _status ($%) { 137 my($path, %option) = @_; 138 my($fh, @stat, $data, $pid, $status, $message, $lsb); 139 140 $status = 0; 141 unless (sysopen($fh, $path, O_RDWR)) { 142 if ($! == ENOENT) { 143 ($message, $lsb) = 144 ("does not seem to be running (no pid file)", 3); 145 goto done; 146 } 147 dief("cannot sysopen(%s, O_RDWR): %s", $path, $!); 148 } 149 @stat = stat($fh) 150 or dief("cannot stat(%s): %s", $path, $!); 151 $data = _read($path, $fh); 152 if ($data eq "") { 153 # this can happen in pf_set(), between open() and lock() 154 ($message, $lsb) = 155 ("does not seem to be running yet (empty pid file)", 4); 156 goto done; 157 } 158 if ($data =~ /^(\d+)(\s+([a-z]+))?\s*$/s) { 159 $pid = $1; 160 } else { 161 dief("unexpected pid file contents in %s: %s", $path, $data); 162 } 163 unless (_alive($pid)) { 164 ($message, $lsb) = 165 ("(pid $pid) does not seem to be running anymore", 1); 166 goto done; 167 } 168 $data = localtime($stat[ST_MTIME]); 169 if ($option{freshness} and 170 $stat[ST_MTIME] < Time::HiRes::time() - $option{freshness}) { 171 ($message, $lsb) = 172 ("(pid $pid) does not seem to be running anymore since $data", 4); 173 goto done; 174 } 175 # so far so good ;-) 176 ($status, $message, $lsb) = (1, "(pid $pid) was active on $data", 0); 177 done: 178 return($status, $message, $lsb); 179} 180 181# 182# set the pid file 183# 184 185my %pf_set_options = ( 186 pid => { optional => 1, type => SCALAR, regex => $_IntegerRegexp }, 187); 188 189sub pf_set ($@) { 190 my($path, %option, $fh); 191 192 $path = shift(@_); 193 %option = validate(@_, \%pf_set_options) if @_; 194 $option{pid} ||= $$; 195 sysopen($fh, $path, O_WRONLY|O_CREAT|O_EXCL) 196 or dief("cannot sysopen(%s, O_WRONLY|O_CREAT|O_EXCL): %s", $path, $!); 197 _write($path, $fh, "$option{pid}\n"); 198} 199 200# 201# check the pid file 202# 203 204my %pf_check_options = ( 205 pid => { optional => 1, type => SCALAR, regex => $_IntegerRegexp }, 206); 207 208sub pf_check ($@) { 209 my($path, %option, $fh, $data, $pid, $action); 210 211 $path = shift(@_); 212 %option = validate(@_, \%pf_check_options) if @_; 213 $option{pid} ||= $$; 214 sysopen($fh, $path, O_RDWR) 215 or dief("cannot sysopen(%s, O_RDWR): %s", $path, $!); 216 $data = _read($path, $fh); 217 if ($data =~ /^(\d+)\s*$/s) { 218 ($pid, $action) = ($1, ""); 219 } elsif ($data =~ /^(\d+)\s+([a-z]+)\s*$/s) { 220 ($pid, $action) = ($1, $2); 221 } else { 222 dief("unexpected pid file contents in %s: %s", $path, $data) 223 } 224 dief("pid file %s has been taken over by pid %d!", $path, $pid) 225 unless $pid == $option{pid}; 226 return($action); 227} 228 229# 230# touch the pid file 231# 232 233sub pf_touch ($) { 234 my($path) = @_; 235 my($now); 236 237 $now = time(); 238 utime($now, $now, $path) 239 or dief("cannot utime(%d, %d, %s): %s", $now, $now, $path, $!); 240} 241 242# 243# unset the pid file 244# 245 246sub pf_unset ($) { 247 my($path) = @_; 248 249 unless (unlink($path)) { 250 return if $! == ENOENT; 251 dief("cannot unlink(%s): %s", $path, $!); 252 } 253} 254 255# 256# use the pid file to find out the program status 257# 258 259my %pf_status_options = ( 260 freshness => { optional => 1, type => SCALAR, regex => $_NumberRegexp }, 261 timeout => { optional => 1, type => SCALAR, regex => $_NumberRegexp }, 262); 263 264sub pf_status ($@) { 265 my($path, %option, $maxtime, $status, $message, $lsb); 266 267 $path = shift(@_); 268 %option = validate(@_, \%pf_status_options) if @_; 269 if ($option{timeout}) { 270 # check multiple times until success or timeout 271 $maxtime = Time::HiRes::time() + $option{timeout}; 272 while (1) { 273 ($status, $message, $lsb) = _status($path, %option); 274 last if $status or Time::HiRes::time() > $maxtime; 275 Time::HiRes::sleep(0.1); 276 } 277 } else { 278 # check only once 279 ($status, $message, $lsb) = _status($path, %option); 280 } 281 return($status, $message, $lsb) if wantarray(); 282 return($status); 283} 284 285# 286# use the pid file to make the program quit 287# 288 289my %pf_quit_options = ( 290 callback => { optional => 1, type => CODEREF }, 291 linger => { optional => 1, type => SCALAR, regex => $_NumberRegexp }, 292 kill => { optional => 1, type => SCALAR }, 293); 294 295sub pf_quit ($@) { 296 my($path, %option, $fh, $data, $pid); 297 298 $path = shift(@_); 299 %option = validate(@_, \%pf_quit_options) if @_; 300 $option{callback} ||= sub { printf("%s\n", $_[0]) }; 301 $option{linger} ||= 5; 302 unless (sysopen($fh, $path, O_RDWR)) { 303 if ($! == ENOENT) { 304 $option{callback}->("does not seem to be running (no pid file)"); 305 return; 306 } 307 dief("cannot sysopen(%s, O_RDWR): %s", $path, $!); 308 } 309 $data = _read($path, $fh, 1); 310 if ($data eq "") { 311 # this can happen while setting the pid file, between open and lock in pf_set() 312 # but what can we do? we wait a bit, try again and complain if itis still empty 313 sleep(1); 314 $data = _read($path, $fh, 1); 315 } 316 if ($data =~ /^(\d+)(\s+([a-z]+))?\s*$/s) { 317 $pid = $1; 318 } else { 319 dief("unexpected pid file contents in %s: %s", $path, $data); 320 } 321 _kill($path, $fh, $pid, %option); 322 # in any case, we make sure that _this_ pid file does not exist anymore 323 # we have to be extra careful to make sure it is the same pid file 324 unless (sysopen($fh, $path, O_RDWR)) { 325 return if $! == ENOENT; 326 dief("cannot sysopen(%s, O_RDWR): %s", $path, $!); 327 } 328 $data = _read($path, $fh); 329 return if $data eq ""; 330 if ($data =~ /^(\d+)(\s+([a-z]+))?\s*$/s) { 331 return unless $1 == $pid; 332 } else { 333 dief("unexpected pid file contents in %s: %s", $path, $data); 334 } 335 # same pid so assume same pid file... remove it 336 $option{callback}->("removing stale pid file: $path"); 337 unless (unlink($path)) { 338 # take into account a potential race condition... 339 dief("cannot unlink(%s): %s", $path, $!) unless $! == ENOENT; 340 } 341} 342 343# 344# sleep for some time, taking into account an optional pid file 345# 346 347my %pf_sleep_options = ( 348 time => { optional => 1, type => SCALAR, regex => $_NumberRegexp }, 349); 350 351sub pf_sleep ($@) { 352 my($path, %option, $end, $sleep); 353 354 $path = shift(@_); 355 %option = validate(@_, \%pf_sleep_options) if @_; 356 $option{time} = 1 unless defined($option{time}); 357 if ($path) { 358 $end = Time::HiRes::time() + $option{time} if $option{time}; 359 while (1) { 360 return(0) if pf_check($path) eq "quit"; 361 pf_touch($path); 362 last unless $option{time}; 363 $sleep = $end - Time::HiRes::time(); 364 last if $sleep <= 0; 365 $sleep = 1 if $sleep > 1; 366 Time::HiRes::sleep($sleep); 367 } 368 } else { 369 Time::HiRes::sleep($option{time}) if $option{time}; 370 } 371 return(1); 372} 373 374# 375# export control 376# 377 378sub import : method { 379 my($pkg, %exported); 380 381 $pkg = shift(@_); 382 grep($exported{$_}++, map("pf_$_", 383 qw(set check touch unset status quit sleep))); 384 export_control(scalar(caller()), $pkg, \%exported, @_); 385} 386 3871; 388 389__DATA__ 390 391=head1 NAME 392 393No::Worries::PidFile - pid file handling without worries 394 395=head1 SYNOPSIS 396 397 use No::Worries::PidFile qw(*); 398 399 # idiomatic daemon code 400 pf_set($pidfile); 401 while (1) { 402 ... 403 $action = pf_check($pidfile); 404 last if $action eq "quit"; 405 pf_touch($pidfile); 406 ... 407 } 408 pf_unset($pidfile); 409 410 # idiomatic daemon code with sleeping 411 pf_set($pidfile); 412 while (1) { 413 ... 414 pf_sleep($pidfile, time => 5) or last; 415 ... 416 } 417 pf_unset($pidfile); 418 419 # here is how to handle a --status option 420 if ($Option{status}) { 421 ($status, $message, $code) = pf_status($pidfile, freshness => 10); 422 printf("myprog %s\n", $message); 423 exit($code); 424 } 425 426 # here is how to handle a --quit option 427 if ($Option{quit}) { 428 pf_quit($pidfile, 429 linger => 10, 430 callback => sub { printf("myprog %s\n", $_[0]) }, 431 ); 432 } 433 434=head1 DESCRIPTION 435 436This module eases pid file handling by providing high level functions to set, 437check, touch and unset pid files. All the functions die() on error. 438 439The pid file usually contains the process id on a single line, followed by a 440newline. However, it can also be followed by an optional I<action>, also 441followed by a newline. This allows some kind of inter-process communication: a 442process using pf_quit() will append the C<quit> I<action> to the pid file and 443the owning process will detect this via pf_check(). 444 445All the functions properly handle concurrency. For instance, when two 446processes start at the exact same time and call pf_set(), only one will 447succeed and the other one will get an error. 448 449Since an existing pid file will make pf_set() fail, it is very important to 450remove the pid file in all situations, including errors. The recommended way 451to do so is to use an END block: 452 453 # we need to know about transient processes 454 use No::Worries::Proc qw(); 455 # we need to record what needs to be cleaned up 456 our(%NeedsCleanup); 457 # we set the pid file here and remember to clean it up 458 pf_set($pidfile); 459 $NeedsCleanup{pidfile} = 1; 460 # ... anything can happen here ... 461 # cleanup code in an END block 462 END { 463 # transient processes do not need cleanup 464 return if $No::Worries::Proc::Transient; 465 # cleanup the pid file if needed 466 pf_unset($pidfile) if $NeedsCleanup{pidfile}; 467 } 468 469=head1 FUNCTIONS 470 471This module provides the following functions (none of them being exported by 472default): 473 474=over 475 476=item pf_set(PATH[, OPTIONS]) 477 478set the pid file by writing the given pid at the given path; supported 479options: 480 481=over 482 483=item * C<pid>: the pid to use (default: C<$$>) 484 485=back 486 487=item pf_check(PATH[, OPTIONS]) 488 489check the pid file and make sure the given pid is present, also return the 490I<action> in the pid file or the empty string; supported options: 491 492=over 493 494=item * C<pid>: the pid to use (default: C<$$>) 495 496=back 497 498=item pf_unset(PATH) 499 500unset the pid file by removing the given path 501 502=item pf_touch(PATH) 503 504touch the pid file (i.e. update the file modification time) to show that the 505owning process is alive 506 507=item pf_sleep(PATH[, OPTIONS]) 508 509check and touch the pid file and eventually sleep for the givent amount of 510time, returning true if the program should continue or false if it has been 511told to stop via pf_quit(); supported options: 512 513=over 514 515=item * C<time>: the time to sleep (default: 1, can be fractional) 516 517=back 518 519=item pf_status(PATH[, OPTIONS]) 520 521use information from the pid file (including its last modification time) to 522guess the status of the corresponding process, return the status (true means 523that the process seems to be running); in list context, also return an 524informative message and an LSB compatible exit code; supported options: 525 526=over 527 528=item * C<freshness>: maximum age allowed for an active pid file 529 530=item * C<timeout>: check multiple times until success or timeout 531 532=back 533 534=item pf_quit(PATH[, OPTIONS]) 535 536tell the process corresponding to the pid file to quit (setting its I<action> 537to C<quit>), wait a bit to check that it indeed stopped and kill it using 538L<No::Worries::Proc>'s proc_terminate() is everything else fails; supported 539options: 540 541=over 542 543=item * C<callback>: code that will be called with information to report 544 545=item * C<linger>: maximum time to wait after having told the process to quit 546(default: 5) 547 548=item * C<kill>: kill specification to use when killing the process 549 550=back 551 552=back 553 554=head1 SEE ALSO 555 556L<http://refspecs.linuxbase.org/LSB_4.1.0/LSB-Core-generic/LSB-Core-generic/iniscrptact.html>, 557L<No::Worries>, 558L<No::Worries::Proc>. 559 560=head1 AUTHOR 561 562Lionel Cons L<http://cern.ch/lionel.cons> 563 564Copyright (C) CERN 2012-2019 565