1#!@BASH_SHELL@ 2# 3# Copyright 2003-2004, 2006-2013 Red Hat, Inc. 4# 5# Author(s): 6# Hardy Merrill <hmerrill at redhat.com> 7# Lon Hohberger <lhh at redhat.com> 8# Michael Moon <Michael dot Moon at oracle.com> 9# Ryan McCabe <rmccabe at redhat.com> 10# 11# This program is Open Source software. You may modify and/or redistribute 12# it persuant to the terms of the Open Software License version 2.1, which 13# is available from the following URL and is included herein by reference: 14# 15# http://opensource.org/licenses/osl-2.1.php 16# 17# NOTES: 18# 19# (1) You can comment out the LOCKFILE declaration below. This will prevent 20# the need for this script to access anything outside of the ORACLE_HOME 21# path. 22# 23# (2) You MUST customize ORACLE_USER, ORACLE_HOME, ORACLE_SID, and 24# ORACLE_HOSTNAME to match your installation if not running from within 25# rgmanager. 26# 27# (3) Do NOT place this script in shared storage; place it in ORACLE_USER's 28# home directory in non-clustered environments and /usr/share/cluster 29# in rgmanager/Red Hat cluster environments. 30# 31# Oracle is a registered trademark of Oracle Corporation. 32# Oracle9i is a trademark of Oracle Corporation. 33# Oracle10g is a trademark of Oracle Corporation. 34# Oracle11g is a trademark of Oracle Corporation. 35# All other trademarks are property of their respective owners. 36# 37# 38# $Id: oradg.sh 127 2009-08-21 09:17:52Z hevirtan $ 39# 40# Original version is distributed with RHCS. The modifications include 41# the following minor changes: 42# - Meta-data moved to a dedicated file 43# - Support for multiple listeners 44# - Disabled EM 45# - SysV init support removed. Only usable with rgmanager 46# 47 48# Grab the global RHCS helper functions 49. $(dirname $0)/ocf-shellfuncs 50. $(dirname $0)/utils/config-utils.sh 51. $(dirname $0)/utils/messages.sh 52. $(dirname $0)/utils/ra-skelet.sh 53 54. /etc/init.d/functions 55 56declare SCRIPT="`basename $0`" 57declare SCRIPTDIR="`dirname $0`" 58 59# Required parameters from rgmanager 60ORACLE_USER=$OCF_RESKEY_user 61ORACLE_HOME=$OCF_RESKEY_home 62ORACLE_SID=$OCF_RESKEY_name 63[ -n "$OCF_RESKEY_tns_admin" ] && export TNS_ADMIN=$OCF_RESKEY_tns_admin 64 65# Optional parameters with default values 66LISTENERS=$OCF_RESKEY_listeners 67LOCKFILE="$ORACLE_HOME/.orainstance-${ORACLE_SID}.lock" 68[ -n "$OCF_RESKEY_vhost" ] && ORACLE_HOSTNAME=$OCF_RESKEY_vhost 69[ -n "$OCF_RESKEY_lockfile" ] && LOCKFILE=$OCF_RESKEY_lockfile 70 71export LISTENERS ORACLE_USER ORACLE_HOME ORACLE_SID LOCKFILE ORACLE_HOSTNAME 72export LD_LIBRARY_PATH=$ORACLE_HOME/lib 73export PATH=$ORACLE_HOME/bin:/bin:/sbin:/usr/bin:/usr/sbin 74 75#declare -i RESTART_RETRIES=3 76declare -i RESTART_RETRIES=0 77declare -r DB_PROCNAMES="pmon" 78declare -r LSNR_PROCNAME="tnslsnr" 79 80# clulog will not log messages when run by the oracle user. 81# This is a hack to work around that. 82if [ "`id -u`" = "`id -u $ORACLE_USER`" ]; then 83 ocf_log() { 84 prio=$1 85 shift 86 logger -i -p daemon."$prio" -- "$*" 87 } 88fi 89 90# 91# Start Oracle (database portion) 92# 93start_db() { 94 declare -i rv 95 declare startup_cmd 96 declare startup_stdout 97 98 ocf_log info "Starting Oracle DB $ORACLE_SID" 99 100 # Set up our sqlplus script. Basically, we're trying to 101 # capture output in the hopes that it's useful in the case 102 # that something doesn't work properly. 103 104 startup_stdout=$(sqlplus "/ as sysdba" << EOF 105set serveroutput on 106startup mount; 107 108declare 109 rol varchar(20); 110begin 111 select database_role into rol from v\$database; 112 113 dbms_output.put_line('Database role is ' || rol); 114 if (rol = 'PHYSICAL STANDBY') then 115 return; 116 end if; 117 118 execute immediate 'alter database open'; 119end; 120/ 121 122select database_role, open_mode from v\$database; 123set heading off; 124set serveroutput off; 125spool ${HA_RSCTMP}/dgstatus.${ORACLE_SID}; 126select open_mode from v\$database; 127spool off; 128EOF 129) 130 rv=$? 131 132 # Data Guard Modification 2 - Remove deprecated parameter error from startup_stdout 133 startup_stdout=$(echo $startup_stdout | sed 's/ORA-32004//g') 134 135 # Dump output to syslog for debugging 136 ocf_log debug "[$ORACLE_SID] [$rv] sent $startup_cmd" 137 ocf_log debug "[$ORACLE_SID] [$rv] got $startup_stdout" 138 139 if [ $rv -ne 0 ]; then 140 ocf_log error "Starting Oracle DB $ORACLE_SID failed, sqlplus returned $rv" 141 return 1 142 fi 143 144 # If we see: 145 # ORA-.....: failure, we failed 146 # Troubleshooting: 147 # ORA-00845 - Try rm -f /dev/shm/ora_* 148 # ORA-01081 - Try echo -e 'shutdown abort;\nquit;'|sqlplus "/ as sysdba" 149 if [[ "$startup_stdout" =~ "ORA-" ]] || [[ "$startup_stdout" =~ "failure" ]]; then 150 ocf_log error "Starting Oracle DB $ORACLE_SID failed, found errors in stdout" 151 return 1 152 fi 153 154 ocf_log info "Started Oracle DB $ORACLE_SID successfully" 155 return 0 156} 157 158 159# 160# Stop Oracle (database portion) 161# 162stop_db() { 163 declare stop_cmd 164 declare stop_stdout 165 declare -i rv 166 declare how_shutdown="$1" 167 168 if [ -z "$1" ]; then 169 how_shutdown="immediate" 170 fi 171 172 ocf_log info "Stopping Oracle DB $ORACLE_SID $how_shutdown" 173 174 ora_procname="ora_${DB_PROCNAMES}_${ORACLE_SID}" 175 status $ora_procname 176 if [ $? -ne 0 ]; then 177 ocf_log debug "no pmon process -- DB $ORACLE_SID already stopped" 178 # No pmon process found, db already down 179 return 0 180 fi 181 182 # Setup for Stop ... 183 stop_cmd="set heading off;\nshutdown $how_shutdown;\nquit;\n" 184 stop_stdout=$(echo -e "$stop_cmd" | sqlplus -S "/ as sysdba") 185 rv=$? 186 187 # Log stdout of the stop command 188 ocf_log debug "[$ORACLE_SID] sent stop command $stop_cmd" 189 ocf_log debug "[$ORACLE_SID] got $stop_stdout" 190 191 # sqlplus returned failure. We'll return failed to rhcs 192 if [ $rv -ne 0 ]; then 193 ocf_log error "Stopping Oracle DB $ORACLE_SID failed, sqlplus returned $rv" 194 return 1 195 fi 196 197 # If we see 'ORA-' or 'failure' in stdout, we're done. 198 if [[ "$stop_stdout" =~ "ORA-" ]] || [[ "$stop_stdout" =~ "failure" ]]; then 199 ocf_log error "Stopping Oracle DB $ORACLE_SID failed, errors in stdout" 200 return 1 201 fi 202 203 ocf_log info "Stopped Oracle DB $ORACLE_SID successfully" 204 return 0 205} 206 207 208# 209# Destroy any remaining processes with refs to $ORACLE_SID 210# 211force_cleanup() { 212 declare pids 213 declare pid 214 215 ocf_log error "Not all Oracle processes for $ORACLE_SID exited cleanly, killing" 216 217 pids=`ps ax | grep "ora_.*_${ORACLE_SID}$" | grep -v grep | awk '{print $1}'` 218 219 for pid in $pids; do 220 kill -9 $pid 221 rv=$? 222 if [ $rv -eq 0 ]; then 223 ocf_log info "Cleanup $ORACLE_SID Killed PID $pid" 224 else 225 ocf_log error "Cleanup $ORACLE_SID Kill PID $pid failed: $rv" 226 fi 227 done 228 229 return 0 230} 231 232 233# 234# Wait for oracle processes to exit. Time out after 60 seconds 235# 236exit_idle() { 237 declare -i n=0 238 239 ocf_log debug "Waiting for Oracle processes for $ORACLE_SID to terminate..." 240 while ps ax | grep "ora_.*_${ORACLE_SID}$" | grep -v grep | grep -q -v $LSNR_PROCNAME; do 241 if [ $n -ge 90 ]; then 242 ocf_log debug "Timed out while waiting for Oracle processes for $ORACLE_SID to terminate" 243 force_cleanup 244 return 0 245 fi 246 sleep 1 247 ((n++)) 248 done 249 250 ocf_log debug "All Oracle processes for $ORACLE_SID have terminated" 251 return 0 252} 253 254 255# 256# Get database background process status. Restart it if it failed and 257# we have seen the lock file. 258# 259get_db_status() { 260 declare -i subsys_lock=$1 261 declare -i i=0 262 declare -i rv=0 263 declare ora_procname 264 265 ocf_log debug "Checking status of DB $ORACLE_SID" 266 267 for procname in $DB_PROCNAMES ; do 268 ora_procname="ora_${procname}_${ORACLE_SID}" 269 270 status $ora_procname 271 if [ $? -eq 0 ] ; then 272 # This one's okay; go to the next one. 273 continue 274 fi 275 276 # We're not supposed to be running, and we are, 277 # in fact, not running... 278 if [ $subsys_lock -ne 0 ]; then 279 ocf_log debug "DB $ORACLE_SID is already stopped" 280 return 3 281 fi 282 283 for (( i=$RESTART_RETRIES ; i; i-- )) ; do 284 # this db process is down - stop and 285 # (re)start all ora_XXXX_$ORACLE_SID processes 286 ocf_log info "Restarting Oracle Database $ORACLE_SID" 287 stop_db 288 289 start_db 290 if [ $? -eq 0 ] ; then 291 # ora_XXXX_$ORACLE_SID processes started 292 # successfully, so break out of the 293 # stop/start # 'for' loop 294 ocf_log info "Restarted Oracle DB $ORACLE_SID successfully" 295 break 296 fi 297 done 298 299 if [ $i -eq 0 ]; then 300 # stop/start's failed - return 1 (failure) 301 ocf_log error "Failed to restart Oracle DB $ORACLE_SID after $RESTART_RETRIES tries" 302 return 1 303 fi 304 done 305 306 ocf_log debug "Checking status of DB $ORACLE_SID success" 307 return 0 308} 309 310 311# 312# Get the status of the Oracle listener process 313# 314get_lsnr_status() { 315 declare -i subsys_lock=$1 316 declare -i rv 317 declare -r LISTENER=$3 318 319 ocf_log debug "Checking status for listener $LISTENER" 320 lsnrctl status "$LISTENER" >& /dev/null 321 rv=$? 322 if [ $rv -eq 0 ] ; then 323 ocf_log debug "Listener $LISTENER is up" 324 return 0 # Listener is running fine 325 fi 326 327 # We're not supposed to be running, and we are, 328 # in fact, not running. Return 3 329 if [ $subsys_lock -ne 0 ]; then 330 ocf_log debug "Listener $LISTENER is stopped as expected" 331 return 3 332 fi 333 334 # Listener is NOT running (but should be) - try to restart 335 for (( i=$RESTART_RETRIES ; i; i-- )) ; do 336 ocf_log info "Listener $LISTENER is down, attempting to restart" 337 lsnrctl start "$LISTENER" >& /dev/null 338 lsnrctl status "$LISTENER" >& /dev/null 339 if [ $? -eq 0 ]; then 340 ocf_log info "Listener $LISTENER was restarted successfully" 341 break # Listener was (re)started and is running fine 342 fi 343 done 344 345 if [ $i -eq 0 ]; then 346 # stop/start's failed - return 1 (failure) 347 ocf_log error "Failed to restart listener $LISTENER after $RESTART_RETRIES tries" 348 return 1 349 fi 350 351 lsnrctl_stdout=$(lsnrctl status "$LISTENER") 352 rv=$? 353 if [ $rv -ne 0 ] ; then 354 ocf_log error "Starting listener $LISTENER failed: $rv output $lsnrctl_stdout" 355 return 1 # Problem restarting the Listener 356 fi 357 358 ocf_log info "Listener $LISTENER started successfully" 359 return 0 # Success restarting the Listener 360} 361 362 363# 364# Helps us keep a running status so we know what our ultimate return 365# code will be. Returns 1 if the $1 and $2 are not equivalent, otherwise 366# returns $1. The return code is meant to be the next $1 when this is 367# called, so, for example: 368# 369# update_status 0 <-- returns 0 370# update_status $? 0 <-- returns 0 371# update_status $? 3 <-- returns 1 (values different - error condition) 372# update_status $? 1 <-- returns 1 (same, but happen to be error state!) 373# 374# update_status 3 375# update_status $? 3 <-- returns 3 376# 377# (and so forth...) 378# 379update_status() { 380 declare -i old_status=$1 381 declare -i new_status=$2 382 383 if [ -z "$2" ]; then 384 return $old_status 385 fi 386 387 if [ $old_status -ne $new_status ]; then 388 ocf_log error "Error: $old_status vs $new_status for $ORACLE_SID - returning 1" 389 return 1 390 fi 391 392 return $old_status 393} 394 395 396# 397# Print an error message to the user and exit. 398# 399oops() { 400 ocf_log error "$ORACLE_SID: Fatal: $1 failed validation checks" 401 exit 1 402} 403 404 405# 406# Do some validation on the user-configurable stuff at the beginning of the 407# script. 408# 409validation_checks() { 410 ocf_log debug "Validating configuration for $ORACLE_SID" 411 412 # If the oracle user doesn't exist, we're done. 413 [ -n "$ORACLE_USER" ] || oops "ORACLE_USER" 414 id -u $ORACLE_USER > /dev/null || oops "ORACLE_USER" 415 id -g $ORACLE_USER > /dev/null || oops "ORACLE_GROUP" 416 417 # If the oracle home isn't a directory, we're done 418 [ -n "$ORACLE_HOME" ] || oops "ORACLE_HOME" 419 420 # If the oracle SID is NULL, we're done 421 [ -n "$ORACLE_SID" ] || oops "ORACLE_SID" 422 423 # Super user? Automatically change UID and exec as oracle user. 424 # Oracle needs to be run as the Oracle user, not root! 425 if [ "`id -u`" = "0" ]; then 426 su $ORACLE_USER -c "$0 $*" 427 exit $? 428 fi 429 430 # If we're not root and not the Oracle user, we're done. 431 [ "`id -u`" = "`id -u $ORACLE_USER`" ] || oops "not ORACLE_USER after su" 432 [ "`id -g`" = "`id -g $ORACLE_USER`" ] || oops "not ORACLE_GROUP after su" 433 434 # Go home. 435 cd "$ORACLE_HOME" 436 437 ocf_log debug "Validation checks for $ORACLE_SID succeeded" 438 return 0 439} 440 441 442# 443# Start Oracle 444# 445start_oracle() { 446 ocf_log info "Starting service $ORACLE_SID" 447 448 start_db 449 rv=$? 450 if [ $rv -ne 0 ]; then 451 ocf_log error "Starting service $ORACLE_SID failed" 452 return 1 453 fi 454 455 for LISTENER in ${LISTENERS}; do 456 ocf_log info "Starting listener $LISTENER" 457 lsnrctl_stdout=$(lsnrctl start "$LISTENER") 458 rv=$? 459 if [ $rv -ne 0 ]; then 460 ocf_log debug "[$ORACLE_SID] Listener $LISTENER start returned $rv output $lsnrctl_stdout" 461 ocf_log error "Starting service $ORACLE_SID failed" 462 return 1 463 fi 464 done 465 466 if [ -n "$ORACLE_HOSTNAME" -a -s ${HA_RSCTMP}/dgstatus.${ORACLE_SID} ]; then 467 # Start DB Console if vhost defined and database_role is READ WRITE 468 if cat ${HA_RSCTMP}/dgstatus.${ORACLE_SID} 2>/dev/null | grep "READ WRITE"; then 469 ocf_log info "Starting Oracle EM DB Console for $ORACLE_SID" 470 emctl start dbconsole 471 if [ $? -ne 0 ]; then 472 ocf_log error "Oracle EM DB Console startup for $ORACLE_SID failed" 473 ocf_log error "Starting service $ORACLE_SID failed" 474 # Force good return status 475 #return 1 476 return 0 477 else 478 ocf_log info "Oracle EM DB Console startup for $ORACLE_SID succeeded" 479 fi 480 fi 481 rm -f ${HA_RSCTMP}/dgstatus.${ORACLE_SID} 482 fi 483 484 if [ -n "$LOCKFILE" ]; then 485 touch "$LOCKFILE" 486 fi 487 488 ocf_log info "Starting service $ORACLE_SID completed successfully" 489 return 0 490} 491 492 493# 494# Stop Oracle 495# 496stop_oracle() { 497 ocf_log info "Stopping service $ORACLE_SID" 498 499 if ! [ -e "$ORACLE_HOME/bin/lsnrctl" ]; then 500 ocf_log error "Oracle Listener Control is not available ($ORACLE_HOME not mounted?)" 501 # XXX should this return 1? 502 return 0 503 fi 504 505 stop_db || stop_db abort 506 if [ $? -ne 0 ]; then 507 ocf_log error "Unable to stop DB for $ORACLE_SID" 508 return 1 509 fi 510 511 for LISTENER in ${LISTENERS}; do 512 ocf_log info "Stopping listener $LISTENER for $ORACLE_SID" 513 lsnrctl_stdout=$(lsnrctl stop "$LISTENER") 514 rv=$? 515 if [ $rv -ne 0 ]; then 516 ocf_log error "Listener $LISTENER stop failed for $ORACLE_SID: $rv output $lsnrctl_stdout" 517 518 pid=`ps ax | grep "tnslsnr $LISTENER " | grep -v grep | awk '{print $1}'` 519 kill -9 $pid 520 rv=$? 521 if [ $rv -eq 0 ]; then 522 ocf_log info "Cleanup $LISTENER Killed PID $pid" 523 else 524 ocf_log error "Cleanup $LISTENER Kill PID $pid failed: $rv" 525 fi 526 fi 527 done 528 529 if [ -n "$ORACLE_HOSTNAME" ]; then 530 # Stop DB Console if vhost defined 531 ocf_log info "Stopping Oracle EM DB Console for $ORACLE_SID" 532 emctl stop dbconsole 533 if [ $? -ne 0 ]; then 534 ocf_log error "Stopping Oracle EM DB Console for $ORACLE_SID failed" 535 ocf_log error "Stopping service $ORACLE_SID failed" 536 # Force good return status 537 #return 1 538 return 0 539 else 540 ocf_log info "Stopping Oracle EM DB Console for $ORACLE_SID succeeded" 541 fi 542 fi 543 544 exit_idle 545 546 if [ $? -ne 0 ]; then 547 ocf_log error "WARNING: Not all Oracle processes exited cleanly for $ORACLE_SID" 548 # XXX - failure? 549 fi 550 551 if [ -n "$LOCKFILE" ]; then 552 rm -f "$LOCKFILE" 553 fi 554 555 ocf_log info "Stopping service $ORACLE_SID succeeded" 556 return 0 557} 558 559 560# 561# Find and display the status of iAS infrastructure. 562# 563# This has three parts: 564# (1) Oracle database itself 565# (2) Oracle listener process 566# (3) OPMN and OPMN-managed processes 567# 568# - If all are (cleanly) down, we return 3. In order for this to happen, 569# $LOCKFILE must not exist. In this case, we try and restart certain parts 570# of the service - as this may be running in a clustered environment. 571# 572# - If some but not all are running (and, if $LOCKFILE exists, we could not 573# restart the failed portions), we return 1 (ERROR) 574# 575# - If all are running, return 0. In the "all-running" case, we recreate 576# $LOCKFILE if it does not exist. 577# 578status_oracle() { 579 declare -i subsys_lock=1 580 declare -i last 581 declare -i depth=$1 582 583 ocf_log debug "Checking status for $ORACLE_SID depth $depth" 584 585 # Check for lock file. Crude and rudimentary, but it works 586 if [ -z "$LOCKFILE" ] || [ -f "$LOCKFILE" ]; then 587 subsys_lock=0 588 fi 589 590 # Check database status 591 get_db_status $subsys_lock $depth 592 update_status $? # Start 593 last=$? 594 595 # Check & report listener status 596 for LISTENER in ${LISTENERS}; do 597 get_lsnr_status $subsys_lock $depth "$LISTENER" 598 update_status $? $last 599 last=$? 600 done 601 602 # No status for DB Console (ORACLE_HOSTNAME) 603 604 # No lock file, but everything's running. Put the lock 605 # file back. XXX - this kosher? 606 if [ $last -eq 0 ] && [ $subsys_lock -ne 0 ]; then 607 touch "$LOCKFILE" 608 fi 609 610 ocf_log debug "Status returning $last for $ORACLE_SID" 611 return $last 612} 613 614 615######################## 616# Do some real work... # 617######################## 618 619# Data Guard Modification 1 - Debug Logging 620case $1 in 621stop | start | status | restart | recover | monitor ) 622[ $(id -u) = 0 ] && exec > "${HA_RSCTMP}/oradg_${ORACLE_SID}_$1.log" 2>&1 623set -x 624date 625echo $@ 626printenv 627esac 628 629case $1 in 630 meta-data) 631 cat `echo $0 | sed 's/^\(.*\)\.sh$/\1.metadata/'` 632 exit 0 633 ;; 634 start) 635 validation_checks $* 636 start_oracle 637 exit $? 638 ;; 639 stop) 640 validation_checks $* 641 stop_oracle 642 exit $? 643 ;; 644 status|monitor) 645 validation_checks $* 646 status_oracle $OCF_CHECK_LEVEL 647 exit $? 648 ;; 649 restart) 650 $0 stop || exit $? 651 $0 start || exit $? 652 exit 0 653 ;; 654 *) 655 echo "usage: $SCRIPT {start|stop|restart|status|monitor|meta-data}" 656 exit 1 657 ;; 658esac 659 660exit 0 661