Sophie: watchdog-5.6-2mdv2010.0 i586

watchdog-5.6-2mdv2010.0.i586.rpm

#!/bin/sh
# This is a "repair binary" for watchdog that allows the tests to fail N times
# within a given period before a reboot is called.  Note that this "grace
# period" should really be a functionality of watchdog itself, IMHO.
#
# Erik Rossen <rossen@prolibre.com>

# If one does not change the default watchdog loop time of 10 secords, N=12
# will allow two minutes of failures before a reboot is signaled.
N=12

# CMAXAGE is the age in seconds that the counter file may have before it is
# considered too old and is wiped out.
CMAXAGE=20

ERR=$1

COUNTER=/var/run/watchdog.counter

if test -f $COUNTER; then
	COUNTERAGE=$(stat -c %Y $COUNTER)
	NOW=$(date +%s)
	if test $(($COUNTERAGE+$CMAXAGE)) -lt $NOW ; then
		rm $COUNTER
	else
		I=$(cat $COUNTER)
	fi
fi

I=${I:-0}

I=$(($I+1))

logger -t "watchdog[$$]" "Failure $I of $N"
logger -t "watchdog[$$]" "PROCESS LIST:"
ps auxww | logger -t "watchdog[$$]" 

if test "$I" -ge "$N" ; then
	logger -t "watchdog[$$]" Too many failures.  Signalling reboot.
	rm $COUNTER
	exit $ERR
fi

echo $I > $COUNTER
exit 0