1#!@BASH_SHELL@
2#
3# Copyright 2003-2004, 2006-2013 Red Hat, Inc.
4#
5# Author(s):
6#     Hardy Merrill <hmerrill at redhat.com>
7#     Lon Hohberger <lhh at redhat.com>
8#     Michael Moon <Michael dot Moon at oracle.com>
9#     Ryan McCabe <rmccabe at redhat.com>
10#
11# This program is Open Source software.  You may modify and/or redistribute
12# it persuant to the terms of the Open Software License version 2.1, which
13# is available from the following URL and is included herein by reference:
14#
15# 	http://opensource.org/licenses/osl-2.1.php
16#
17# NOTES:
18#
19# (1) You can comment out the LOCKFILE declaration below.  This will prevent
20# the need for this script to access anything outside of the ORACLE_HOME
21# path.
22#
23# (2) You MUST customize ORACLE_USER, ORACLE_HOME, ORACLE_SID, and
24# ORACLE_HOSTNAME to match your installation if not running from within
25# rgmanager.
26#
27# (3) Do NOT place this script in shared storage; place it in ORACLE_USER's
28# home directory in non-clustered environments and /usr/share/cluster
29# in rgmanager/Red Hat cluster environments.
30#
31# Oracle is a registered trademark of Oracle Corporation.
32# Oracle9i is a trademark of Oracle Corporation.
33# Oracle10g is a trademark of Oracle Corporation.
34# Oracle11g is a trademark of Oracle Corporation.
35# All other trademarks are property of their respective owners.
36#
37#
38# $Id: oradg.sh 127 2009-08-21 09:17:52Z hevirtan $
39#
40# Original version is distributed with RHCS. The modifications include
41# the following minor changes:
42# - Meta-data moved to a dedicated file
43# - Support for multiple listeners
44# - Disabled EM
45# - SysV init support removed. Only usable with rgmanager
46#
47
48# Grab the global RHCS helper functions
49. $(dirname $0)/ocf-shellfuncs
50. $(dirname $0)/utils/config-utils.sh
51. $(dirname $0)/utils/messages.sh
52. $(dirname $0)/utils/ra-skelet.sh
53
54. /etc/init.d/functions
55
56declare SCRIPT="`basename $0`"
57declare SCRIPTDIR="`dirname $0`"
58
59# Required parameters from rgmanager
60ORACLE_USER=$OCF_RESKEY_user
61ORACLE_HOME=$OCF_RESKEY_home
62ORACLE_SID=$OCF_RESKEY_name
63[ -n "$OCF_RESKEY_tns_admin" ] && export TNS_ADMIN=$OCF_RESKEY_tns_admin
64
65# Optional parameters with default values
66LISTENERS=$OCF_RESKEY_listeners
67LOCKFILE="$ORACLE_HOME/.orainstance-${ORACLE_SID}.lock"
68[ -n "$OCF_RESKEY_vhost" ] && ORACLE_HOSTNAME=$OCF_RESKEY_vhost
69[ -n "$OCF_RESKEY_lockfile" ] && LOCKFILE=$OCF_RESKEY_lockfile
70
71export LISTENERS ORACLE_USER ORACLE_HOME ORACLE_SID LOCKFILE ORACLE_HOSTNAME
72export LD_LIBRARY_PATH=$ORACLE_HOME/lib
73export PATH=$ORACLE_HOME/bin:/bin:/sbin:/usr/bin:/usr/sbin
74
75#declare -i	RESTART_RETRIES=3
76declare -i	RESTART_RETRIES=0
77declare -r	DB_PROCNAMES="pmon"
78declare -r	LSNR_PROCNAME="tnslsnr"
79
80# clulog will not log messages when run by the oracle user.
81# This is a hack to work around that.
82if [ "`id -u`" = "`id -u $ORACLE_USER`" ]; then
83	ocf_log() {
84		prio=$1
85		shift
86		logger -i -p daemon."$prio" -- "$*"
87	}
88fi
89
90#
91# Start Oracle (database portion)
92#
93start_db() {
94	declare -i rv
95	declare startup_cmd
96	declare startup_stdout
97
98	ocf_log info "Starting Oracle DB $ORACLE_SID"
99
100	# Set up our sqlplus script.  Basically, we're trying to
101	# capture output in the hopes that it's useful in the case
102	# that something doesn't work properly.
103
104	startup_stdout=$(sqlplus "/ as sysdba" << EOF
105set serveroutput on
106startup mount;
107
108declare
109  rol varchar(20);
110begin
111  select database_role into rol from v\$database;
112  
113  dbms_output.put_line('Database role is ' || rol);
114  if (rol = 'PHYSICAL STANDBY') then
115    return;
116  end if;
117
118  execute immediate 'alter database open';
119end;
120/
121
122select database_role, open_mode from v\$database;
123set heading off;
124set serveroutput off;
125spool ${HA_RSCTMP}/dgstatus.${ORACLE_SID};
126select open_mode from v\$database;
127spool off;
128EOF
129)
130	rv=$?
131
132        # Data Guard Modification 2 - Remove deprecated parameter error from startup_stdout
133        startup_stdout=$(echo $startup_stdout | sed 's/ORA-32004//g')
134
135	# Dump output to syslog for debugging
136	ocf_log debug "[$ORACLE_SID] [$rv] sent $startup_cmd"
137	ocf_log debug "[$ORACLE_SID] [$rv] got $startup_stdout"
138
139	if [ $rv -ne 0 ]; then
140		ocf_log error "Starting Oracle DB $ORACLE_SID failed, sqlplus returned $rv"
141		return 1
142	fi
143
144	# If we see:
145	# ORA-.....: failure, we failed
146	# Troubleshooting:
147	#   ORA-00845 - Try rm -f /dev/shm/ora_*
148	#   ORA-01081 - Try echo -e 'shutdown abort;\nquit;'|sqlplus "/ as sysdba"
149	if [[ "$startup_stdout" =~ "ORA-" ]] || [[ "$startup_stdout" =~ "failure" ]]; then
150		ocf_log error "Starting Oracle DB $ORACLE_SID failed, found errors in stdout"
151		return 1
152	fi
153
154	ocf_log info "Started Oracle DB $ORACLE_SID successfully"
155	return 0
156}
157
158
159#
160# Stop Oracle (database portion)
161#
162stop_db() {
163	declare stop_cmd
164	declare stop_stdout
165	declare -i rv
166	declare how_shutdown="$1"
167
168	if [ -z "$1" ]; then
169		how_shutdown="immediate"
170	fi
171
172	ocf_log info "Stopping Oracle DB $ORACLE_SID $how_shutdown"
173
174	ora_procname="ora_${DB_PROCNAMES}_${ORACLE_SID}"
175	status $ora_procname
176	if [ $? -ne 0 ]; then
177		ocf_log debug "no pmon process -- DB $ORACLE_SID already stopped"
178		# No pmon process found, db already down
179		return 0
180	fi
181
182	# Setup for Stop ...
183	stop_cmd="set heading off;\nshutdown $how_shutdown;\nquit;\n"
184	stop_stdout=$(echo -e "$stop_cmd" | sqlplus -S "/ as sysdba")
185	rv=$?
186
187	# Log stdout of the stop command
188	ocf_log debug "[$ORACLE_SID] sent stop command $stop_cmd"
189	ocf_log debug "[$ORACLE_SID] got $stop_stdout"
190
191	# sqlplus returned failure. We'll return failed to rhcs
192	if [ $rv -ne 0 ]; then
193		ocf_log error "Stopping Oracle DB $ORACLE_SID failed, sqlplus returned $rv"
194		return 1
195	fi
196
197	# If we see 'ORA-' or 'failure' in stdout, we're done.
198	if [[ "$stop_stdout" =~ "ORA-" ]] || [[ "$stop_stdout" =~ "failure" ]]; then
199		ocf_log error "Stopping Oracle DB $ORACLE_SID failed, errors in stdout"
200		return 1
201	fi
202
203	ocf_log info "Stopped Oracle DB $ORACLE_SID successfully"
204	return 0
205}
206
207
208#
209# Destroy any remaining processes with refs to $ORACLE_SID
210#
211force_cleanup() {
212	declare pids
213	declare pid
214
215	ocf_log error "Not all Oracle processes for $ORACLE_SID exited cleanly, killing"
216
217	pids=`ps ax | grep "ora_.*_${ORACLE_SID}$" | grep -v grep | awk '{print $1}'`
218
219	for pid in $pids; do
220		kill -9 $pid
221		rv=$?
222		if [ $rv -eq 0 ]; then
223			ocf_log info "Cleanup $ORACLE_SID Killed PID $pid"
224		else
225			ocf_log error "Cleanup $ORACLE_SID Kill PID $pid failed: $rv"
226		fi
227	done
228
229	return 0
230}
231
232
233#
234# Wait for oracle processes to exit.  Time out after 60 seconds
235#
236exit_idle() {
237	declare -i n=0
238
239	ocf_log debug "Waiting for Oracle processes for $ORACLE_SID to terminate..."
240	while ps ax | grep "ora_.*_${ORACLE_SID}$" | grep -v grep | grep -q -v $LSNR_PROCNAME; do
241		if [ $n -ge 90 ]; then
242			ocf_log debug "Timed out while waiting for Oracle processes for $ORACLE_SID to terminate"
243			force_cleanup
244			return 0
245		fi
246		sleep 1
247		((n++))
248	done
249
250	ocf_log debug "All Oracle processes for $ORACLE_SID have terminated"
251	return 0
252}
253
254
255#
256# Get database background process status.  Restart it if it failed and
257# we have seen the lock file.
258#
259get_db_status() {
260	declare -i subsys_lock=$1
261	declare -i i=0
262	declare -i rv=0
263	declare ora_procname
264
265	ocf_log debug "Checking status of DB $ORACLE_SID"
266
267	for procname in $DB_PROCNAMES ; do
268		ora_procname="ora_${procname}_${ORACLE_SID}"
269
270		status $ora_procname
271		if [ $? -eq 0 ] ; then
272			# This one's okay; go to the next one.
273			continue
274		fi
275
276		# We're not supposed to be running, and we are,
277		# in fact, not running...
278		if [ $subsys_lock -ne 0 ]; then
279			ocf_log debug "DB $ORACLE_SID is already stopped"
280			return 3
281		fi
282
283		for (( i=$RESTART_RETRIES ; i; i-- )) ; do
284			# this db process is down - stop and
285			# (re)start all ora_XXXX_$ORACLE_SID processes
286			ocf_log info "Restarting Oracle Database $ORACLE_SID"
287			stop_db
288
289			start_db
290			if [ $? -eq 0 ] ; then
291				# ora_XXXX_$ORACLE_SID processes started
292				# successfully, so break out of the
293				# stop/start # 'for' loop
294				ocf_log info "Restarted Oracle DB $ORACLE_SID successfully"
295				break
296			fi
297		done
298
299		if [ $i -eq 0 ]; then
300			# stop/start's failed - return 1 (failure)
301			ocf_log error "Failed to restart Oracle DB $ORACLE_SID after $RESTART_RETRIES tries"
302			return 1
303		fi
304	done
305
306	ocf_log debug "Checking status of DB $ORACLE_SID success"
307	return 0
308}
309
310
311#
312# Get the status of the Oracle listener process
313#
314get_lsnr_status() {
315	declare -i subsys_lock=$1
316	declare -i rv
317	declare -r LISTENER=$3
318
319	ocf_log debug "Checking status for listener $LISTENER"
320	lsnrctl status "$LISTENER" >& /dev/null
321	rv=$?
322	if [ $rv -eq 0 ] ; then
323		ocf_log debug "Listener $LISTENER is up"
324		return 0 # Listener is running fine
325	fi
326
327	# We're not supposed to be running, and we are,
328	# in fact, not running.  Return 3
329	if [ $subsys_lock -ne 0 ]; then
330		ocf_log debug "Listener $LISTENER is stopped as expected"
331		return 3
332	fi
333
334	# Listener is NOT running (but should be) - try to restart
335	for (( i=$RESTART_RETRIES ; i; i-- )) ; do
336		ocf_log info "Listener $LISTENER is down, attempting to restart"
337		lsnrctl start "$LISTENER" >& /dev/null
338		lsnrctl status "$LISTENER" >& /dev/null
339		if [ $? -eq 0 ]; then
340			ocf_log info "Listener $LISTENER was restarted successfully"
341			break # Listener was (re)started and is running fine
342		fi
343	done
344
345	if [ $i -eq 0 ]; then
346		# stop/start's failed - return 1 (failure)
347		ocf_log error "Failed to restart listener $LISTENER after $RESTART_RETRIES tries"
348		return 1
349	fi
350
351	lsnrctl_stdout=$(lsnrctl status "$LISTENER")
352	rv=$?
353	if [ $rv -ne 0 ] ; then
354		ocf_log error "Starting listener $LISTENER failed: $rv output $lsnrctl_stdout"
355		return 1 # Problem restarting the Listener
356	fi
357
358	ocf_log info "Listener $LISTENER started successfully"
359	return 0 # Success restarting the Listener
360}
361
362
363#
364# Helps us keep a running status so we know what our ultimate return
365# code will be.  Returns 1 if the $1 and $2 are not equivalent, otherwise
366# returns $1.  The return code is meant to be the next $1 when this is
367# called, so, for example:
368#
369# update_status 0   <-- returns 0
370# update_status $? 0 <-- returns 0
371# update_status $? 3 <-- returns 1 (values different - error condition)
372# update_status $? 1 <-- returns 1 (same, but happen to be error state!)
373#
374# update_status 3
375# update_status $? 3 <-- returns 3
376#
377# (and so forth...)
378#
379update_status() {
380	declare -i old_status=$1
381	declare -i new_status=$2
382
383	if [ -z "$2" ]; then
384		return $old_status
385	fi
386
387	if [ $old_status -ne $new_status ]; then
388		ocf_log error "Error: $old_status vs $new_status for $ORACLE_SID - returning 1"
389		return 1
390	fi
391
392	return $old_status
393}
394
395
396#
397# Print an error message to the user and exit.
398#
399oops() {
400	ocf_log error "$ORACLE_SID: Fatal: $1 failed validation checks"
401	exit 1
402}
403
404
405#
406# Do some validation on the user-configurable stuff at the beginning of the
407# script.
408#
409validation_checks() {
410	ocf_log debug "Validating configuration for $ORACLE_SID"
411
412	# If the oracle user doesn't exist, we're done.
413	[ -n "$ORACLE_USER" ] || oops "ORACLE_USER"
414	id -u $ORACLE_USER > /dev/null || oops "ORACLE_USER"
415	id -g $ORACLE_USER > /dev/null || oops "ORACLE_GROUP"
416
417	# If the oracle home isn't a directory, we're done
418	[ -n "$ORACLE_HOME" ] || oops "ORACLE_HOME"
419
420	# If the oracle SID is NULL, we're done
421	[ -n "$ORACLE_SID" ] || oops "ORACLE_SID"
422
423	# Super user? Automatically change UID and exec as oracle user.
424	# Oracle needs to be run as the Oracle user, not root!
425	if [ "`id -u`" = "0" ]; then
426		su $ORACLE_USER -c "$0 $*"
427		exit $?
428	fi
429
430	# If we're not root and not the Oracle user, we're done.
431	[ "`id -u`" = "`id -u $ORACLE_USER`" ] || oops "not ORACLE_USER after su"
432	[ "`id -g`" = "`id -g $ORACLE_USER`" ] || oops "not ORACLE_GROUP after su"
433
434	# Go home.
435	cd "$ORACLE_HOME"
436
437	ocf_log debug "Validation checks for $ORACLE_SID succeeded"
438	return 0
439}
440
441
442#
443# Start Oracle
444#
445start_oracle() {
446	ocf_log info "Starting service $ORACLE_SID"
447
448	start_db
449	rv=$?
450	if [ $rv -ne 0 ]; then
451		ocf_log error "Starting service $ORACLE_SID failed"
452		return 1
453	fi
454
455	for LISTENER in ${LISTENERS}; do
456		ocf_log info "Starting listener $LISTENER"
457		lsnrctl_stdout=$(lsnrctl start "$LISTENER")
458		rv=$?
459		if [ $rv -ne 0 ]; then
460			ocf_log debug "[$ORACLE_SID] Listener $LISTENER start returned $rv output $lsnrctl_stdout"
461			ocf_log error "Starting service $ORACLE_SID failed"
462			return 1
463		fi
464	done
465
466	if [ -n "$ORACLE_HOSTNAME" -a -s ${HA_RSCTMP}/dgstatus.${ORACLE_SID} ]; then
467        	# Start DB Console if vhost defined and database_role is READ WRITE
468		if cat ${HA_RSCTMP}/dgstatus.${ORACLE_SID} 2>/dev/null | grep "READ WRITE"; then
469			ocf_log info "Starting Oracle EM DB Console for $ORACLE_SID"
470			emctl start dbconsole
471			if [ $? -ne 0 ]; then
472				ocf_log error "Oracle EM DB Console startup for $ORACLE_SID failed"
473				ocf_log error "Starting service $ORACLE_SID failed"
474				# Force good return status
475				#return 1
476				return 0
477			else
478				ocf_log info "Oracle EM DB Console startup for $ORACLE_SID succeeded"
479			fi
480		fi
481                rm -f ${HA_RSCTMP}/dgstatus.${ORACLE_SID}
482	fi
483
484	if [ -n "$LOCKFILE" ]; then
485		touch "$LOCKFILE"
486	fi
487
488	ocf_log info "Starting service $ORACLE_SID completed successfully"
489	return 0
490}
491
492
493#
494# Stop Oracle
495#
496stop_oracle() {
497	ocf_log info "Stopping service $ORACLE_SID"
498
499	if ! [ -e "$ORACLE_HOME/bin/lsnrctl" ]; then
500		ocf_log error "Oracle Listener Control is not available ($ORACLE_HOME not mounted?)"
501		# XXX should this return 1?
502		return 0
503	fi
504
505	stop_db || stop_db abort
506	if [ $? -ne 0 ]; then
507		ocf_log error "Unable to stop DB for $ORACLE_SID"
508		return 1
509	fi
510
511	for LISTENER in ${LISTENERS}; do
512		ocf_log info "Stopping listener $LISTENER for $ORACLE_SID"
513		lsnrctl_stdout=$(lsnrctl stop "$LISTENER")
514		rv=$?
515		if [ $rv -ne 0 ]; then
516			ocf_log error "Listener $LISTENER stop failed for $ORACLE_SID: $rv output $lsnrctl_stdout"
517
518			pid=`ps ax | grep "tnslsnr $LISTENER " | grep -v grep | awk '{print $1}'`
519			kill -9 $pid
520			rv=$?
521			if [ $rv -eq 0 ]; then
522				ocf_log info "Cleanup $LISTENER Killed PID $pid"
523			else
524				ocf_log error "Cleanup $LISTENER Kill PID $pid failed: $rv"
525			fi
526		fi
527	done
528
529	if [ -n "$ORACLE_HOSTNAME" ]; then
530		# Stop DB Console if vhost defined
531		ocf_log info "Stopping Oracle EM DB Console for $ORACLE_SID"
532		emctl stop dbconsole
533		if [ $? -ne 0 ]; then
534			ocf_log error "Stopping Oracle EM DB Console for $ORACLE_SID failed"
535			ocf_log error "Stopping service $ORACLE_SID failed"
536			# Force good return status
537			#return 1
538			return 0
539		else
540			ocf_log info "Stopping Oracle EM DB Console for $ORACLE_SID succeeded"
541		fi
542	fi
543
544	exit_idle
545
546	if [ $? -ne 0 ]; then
547		ocf_log error "WARNING: Not all Oracle processes exited cleanly for $ORACLE_SID"
548		# XXX - failure?
549	fi
550
551	if [ -n "$LOCKFILE" ]; then
552		rm -f "$LOCKFILE"
553	fi
554
555	ocf_log info "Stopping service $ORACLE_SID succeeded"
556	return 0
557}
558
559
560#
561# Find and display the status of iAS infrastructure.
562#
563# This has three parts:
564# (1) Oracle database itself
565# (2) Oracle listener process
566# (3) OPMN and OPMN-managed processes
567#
568# - If all are (cleanly) down, we return 3.  In order for this to happen,
569# $LOCKFILE must not exist.  In this case, we try and restart certain parts
570# of the service - as this may be running in a clustered environment.
571#
572# - If some but not all are running (and, if $LOCKFILE exists, we could not
573# restart the failed portions), we return 1 (ERROR)
574#
575# - If all are running, return 0.  In the "all-running" case, we recreate
576# $LOCKFILE if it does not exist.
577#
578status_oracle() {
579	declare -i subsys_lock=1
580	declare -i last
581	declare -i depth=$1
582
583	ocf_log debug "Checking status for $ORACLE_SID depth $depth"
584
585	# Check for lock file. Crude and rudimentary, but it works
586	if [ -z "$LOCKFILE" ] || [ -f "$LOCKFILE" ]; then
587		subsys_lock=0
588	fi
589
590	# Check database status
591	get_db_status $subsys_lock $depth
592	update_status $? # Start
593	last=$?
594
595	# Check & report listener status
596	for LISTENER in ${LISTENERS}; do
597		get_lsnr_status $subsys_lock $depth "$LISTENER"
598		update_status $? $last
599		last=$?
600	done
601
602	# No status for DB Console (ORACLE_HOSTNAME)
603
604	# No lock file, but everything's running.  Put the lock
605	# file back. XXX - this kosher?
606	if [ $last -eq 0 ] && [ $subsys_lock -ne 0 ]; then
607		touch "$LOCKFILE"
608	fi
609
610	ocf_log debug "Status returning $last for $ORACLE_SID"
611	return $last
612}
613
614
615########################
616# Do some real work... #
617########################
618
619# Data Guard Modification 1 - Debug Logging
620case $1 in
621stop | start | status | restart | recover | monitor )
622[ $(id -u) = 0 ] && exec > "${HA_RSCTMP}/oradg_${ORACLE_SID}_$1.log" 2>&1
623set -x
624date
625echo $@
626printenv
627esac
628
629case $1 in
630	meta-data)
631		cat `echo $0 | sed 's/^\(.*\)\.sh$/\1.metadata/'`
632		exit 0
633		;;
634	start)
635		validation_checks $*
636		start_oracle
637		exit $?
638		;;
639	stop)
640		validation_checks $*
641		stop_oracle
642		exit $?
643		;;
644	status|monitor)
645		validation_checks $*
646		status_oracle $OCF_CHECK_LEVEL
647		exit $?
648		;;
649	restart)
650		$0 stop || exit $?
651		$0 start || exit $?
652		exit 0
653		;;
654	*)
655		echo "usage: $SCRIPT {start|stop|restart|status|monitor|meta-data}"
656		exit 1
657		;;
658esac
659
660exit 0
661