#!/bin/bash
# -*- mode: shell-script; sh-basic-offset: 3; sh-indentation: 3 -*-
#
# Init file for EOS watchdog
#

WATCHDOG_PUNCH_TIMEOUT=30
REDUNDANCY_FILE="/var/run/redundancy-protocol"

punch_watchdog()
{
   log_prefix="date -u +%H:%M:%S"
   # set the watchdog to WATCHDOG_PUNCH_TIMEOUT seconds, it gets killed in the
   # reload cli command and we want to make sure that we have enough time to
   # shutdown cleanly. This sets the watchdog on all scds in the system,
   # Linecards will txDisable in WATCHDOG_PUNCH_TIMEOUT seconds.
   if [ "$redundancyProtocol" != "sso" ]; then
      watchdog -o $WATCHDOG_PUNCH_TIMEOUT
      # Echo a dot (.) with a timestamp after punching the watchdog each time.
      # This will help us track which stage we have crossed.
      echo -e "[`$log_prefix`] watchdog punch .\r"
   fi
}

shutdown_tpm()
{
   # TPM 2.0 are sensitive to unexpected reboot and may increment their lockout
   # counter if reset without prior shutdown command.
   if [ -f /sys/class/tpm/tpm0/tpm_version_major ]; then
      if [ $( cat /sys/class/tpm/tpm0/tpm_version_major ) -eq "2" ]; then
         which tpm2_shutdown >/dev/null 2>&1 && tpm2_shutdown --clear --tcti=device
      fi
   fi
}

case "$1" in
    start)
        ;;
    stop)
        # kill the watchdog puncher
        watchdog -k
        # Echo an empty line first
        echo
        echo -e "Restarting system\r"
        pkill netnsd-watcher
        # suspend ProcMgr operation; avoid stoppm as strata stoppm scripts hang 
        ProcMgr suspend
        # kill SuperServer so hourly show-tech won't run
        pkill -9 SuperServer
        punch_watchdog

        redundancyProtocol=`cat $REDUNDANCY_FILE 2>/dev/null | grep ^REDUNDANCY_PROTOCOL | cut -d '=' -f 2`

        punch_watchdog

        # sync the hardware clock
        hwclock --systohc
        touch -m /mnt/flash/persist/clock
        # write the reset cause to a file in flash, read by the
        # ScdAgent the next time the system boots.
        mkdir -p /mnt/flash/debug/
        chmod 770 /mnt/flash/debug
        # First write to the deprecated file 'local_reload_cause'. This is
        # warranted to support the downgrade scenario. Only generate a new
        # local cause if none was defined earlier. This allows operations such
        # as FPGA upgrade to define their own causes.
        grep "rebootIsImminent" /mnt/flash/debug/local_reload_cause* > /dev/null 2>&1
        if [ $? -ne 0 ]
        then
           description="Reload requested by the user."
           echo -e "${description}\nrebootIsImminent" > \
                 /mnt/flash/debug/local_reload_cause
           writeLocalReloadCause --consume-later --no-sync-sleep "${description}"
        fi
        punch_watchdog

        # Attempt to flush all data to disk and emergency-remount the filesystem to
        # r/o. We first do a sync, then remount, then another sync since the remount
        # is asynchronous.
        
        df -t vfat /mnt/flash >/dev/null 2>&1
        FLASH_IS_VFAT=$?

        if [ $FLASH_IS_VFAT -eq 0 ];        
        then
           # Take special care for VFAT as its sync does not flush cluster summary.
           # We also need to kill all processes holding open file descriptors on
           # flash before emergency remount as it can cause filesystem inconsistency.
           # In case we need some debugging
           DEBUGLOG=""
           if [ -f /mnt/drive/WATCHDOG_STOP_DEBUG ]; then
              DEBUGLOG="/mnt/drive/WATCHDOG_STOP_DEBUG.log"
           elif [ -f /mnt/flash/WATCHDOG_STOP_DEBUG ]; then
              DEBUGLOG="/mnt/flash/WATCHDOG_STOP_DEBUG.log"
           fi

           if [ -n "$DEBUGLOG" ]; then
              echo -e "WARNING: ${DEBUGLOG%.log} is present, saving debug logs\r"
              date >>$DEBUGLOG
              echo "--- FILES ---" >>$DEBUGLOG
              find /mnt/flash -cmin -15 -printf "%t %p\n" | sort -nr >>$DEBUGLOG
              echo "--- PROCESSES ---" >>$DEBUGLOG
              ps -elf >>$DEBUGLOG
              echo "--- LSOF ---" >>$DEBUGLOG
              lsof +D /mnt/flash +D /persist >>$DEBUGLOG
              SyncFile $DEBUGLOG

              punch_watchdog
           fi

           # Kill everything having an open handle on /mnt/flash.
           # We have to kill process BEFORE emergency remount per testing.

           FLASHPROCS=`lsof -t +D /mnt/flash +D /persist`
           if [ -n "$FLASHPROCS" ]; then
              kill -9 $FLASHPROCS 2>/dev/null
           fi
           punch_watchdog

           sync
           
           # VFAT's sync doesn't sync cluster summaries. Trigger it by fsync.
           # This is highly implementation dependent - check fat_file_fsync()
           # in the kernel source, but it hasn't really changed. Doing it twice
           # to make sure it's flushed to block device.
           SyncFile /mnt/flash /mnt/flash
        else
           # otherwise just do a simple sync
           sync
        fi

        punch_watchdog

        # the loop_thread in the kernel would bail out if the backing-file
        # is read-only. This takes care of the race between loop_thread and
        # the remount-ro thread in the kernel.
        if [ -e /mnt/drive/.arista_varlo ]; then
            chmod 0400 /mnt/drive/.arista_varlo
            sleep 1
        fi
        
        echo "u" > /proc/sysrq-trigger
        punch_watchdog

        # make sure everything is flushed, because the above line is async
        # this will repeat until it is complete
        # Allow 5 extra seconds so that the loop is killed by hardware watchdog
        # on Arista boxes. This timeout is to prevent white boxes from hanging
        # at reboot time
        remount_timeout=$(( WATCHDOG_PUNCH_TIMEOUT + 5 ))
        remount_wait="while ! mount -o remount,ro /mnt/flash; do :; done"
        if ! timeout $remount_timeout sh -c "$remount_wait"; then
           echo "Timed out waiting for R/O remount"
        fi
        punch_watchdog
        # current Linux implementation has
        # this backwards... it flushes dirty blocks and then downgrades
        # everyone.... so there is an obvious race here where someone could
        # dirty something after the flush but before the downgrade
        # this sync/sleep should fix this.
        sync
        if [ $FLASH_IS_VFAT -eq 0 ]; then
           # make sure we flush the cluster summary we asll
           SyncFile /mnt/flash /mnt/flash
        fi        
        punch_watchdog
        # rumor has it that flash controllers lie, and
        # reads complete before they are truly persistent.
        sleep 1

        shutdown_tpm

        # disable watchdog NMI delivery. do this before scheduling the
        # linecard watchdog and resetting fabrics, or we won't get
        # to powercycle on time.
        # Check if the file exists first to prevent an error message to be
        # displayed on platforms without Scd (whiteboxes).
        if [ -f /sys/class/scd/disable_nmi ]; then
           echo 1 > /sys/class/scd/disable_nmi
        fi
        punch_watchdog

        if [ "$redundancyProtocol" != "sso" ]; then
           # reset fabric asics, in order to shutdown their dma
           # engines across a reload. turn on hot reset propagation so
           # the fabric reset occurs during the reload.
           /usr/bin/resetFes

           # set the watchdog to 1 second on linecards, this means
           # linecards will tx disable no more than 1 second after the
           # supervisor goes away.
           watchdog -l 1

           # no more punch_watchdog after this point, as watchdog -o
           # resets a prior watchdog -l
        fi

        # power-cycle the system
        /usr/bin/powercycle
        ;;
    *)
        echo $"Usage: $0 {stop}"
        exit 1
        ;;
esac
