#!/usr/bin/env python3
# Copyright (c) 2017 Arista Networks, Inc.  All rights reserved.
# Arista Networks, Inc. Confidential and Proprietary.

# pylint: disable=consider-using-f-string

from CliPlugin.Fpga import getPrefdl
import FpgaUtil
import sys
import Tac
import optparse # pylint: disable=deprecated-module
import re
import time
import Tracing
import json
# pylint: disable-next=consider-using-from-import

__defaultTraceHandle__ = Tracing.Handle( "FpgaAgent" )
t0 = Tracing.trace0

fastCli = '/usr/bin/FastCli'

# Here, we list all the Agents that use the SCD for whatever reason.
# Before reset, we need to shutdown those - among this list - that are running
# and restart after the reset.
# We need to group them by their interdependency - to avoid race condition
# and using default values of certain fields. One example is that if Scd Agent
# is started together with XcvrAgent, due to race between them, XcvrAgent may
# default the transciever type and can cause the Strata to think that the
# port may have changed the xcvrMediaType and may flap it.
# We use the grouping from the experience on Doran testing, and then used
# StageGraph for Asu Hitless as hint to decide grouping.
# All agents in one group can all be started together - as they do not race
# with each other. While Agents from later group would depend - roughly on
#  Agents from previous group to have started and inited.
# The second element is a number of seconds to pause before starting next
# group of agents. Note that stopping them can be done all of them together.
# (Asu Hitless graph can be found in
#  /src/StageMgr/StageGraph/AsuHitlessBootStrata.py)

groupOfAgentsUsingScd = [
      # A group below is under "PrimaryAgent" stage.
      [ [ "PlutoSmbus",
          "PLSmbusMediator",
          "PLSystem",
          "Smbus",
          "Smbus-fixed-Switch",
          "Smbus-fixed-Cpu",
          "PLDevice",
          "PLDevice-FanTray",
          "PLDevice-Led",
          "PLDevice-TempDevice",
          "PLDevice-PoeDevice",
          "PLSlot",
          "Scd",
          "PoeTec",
          "FanDetector",
        ],
        2
      ],
      # A group below is after above mentioned stage.
      [ [ "Max6658",
          "Pmbus",
          "Ds125Br-FixedSystem",
          "PowerSupplyDetector",
          "StandbyCpld",
          "Ucd9012",
          "Max6581",
          "Strata",
          "Xcvr",
          "XcvrAgent",
          "PLXcvr",
          "Idt82p33831",
          "PtpTimeSync",
          "Sol",
          "PhyIsland-FixedSystem",
          "Babbage-FixedSystem",
          "PowerSupplyFixed",
          "PowerManager",
        ],
        0
      ],
   ]

usage = """
   hitlessResetFpga [OPTIONS] [fpgaName]

   fpgaName:    Name of the FPGA to reset. (All System FPGAs if not speficied)
"""

parser = optparse.OptionParser(usage=usage)
parser.add_option( "-r", "--restartAgents", action="store_true",
                   help="stop agents/processes before hitless reset and "
                        "then start after it" )
parser.add_option( "-v", "--verbose", action="store_true",
                   help="enable verbose output" )
parser.add_option( "-p", "--poeMaintain", action="store_true",
                   help="Attempt poeMaintain reset if hitless reset "
                        "is not available on the fpga" )
( options, args ) = parser.parse_args()

prefdl = getPrefdl()
fpgas = FpgaUtil.systemFpgas( prefdl ).fpgaList()

if not args:
   fpgaName = None
else:
   fpgaName = args[ 0 ]

def printHelper( message ):
   if options.verbose:
      print( message )
   else:
      t0 ( message )

def hitlessResetFpga( fpga ):
   rc = 0
   if fpga.isHitlessResetSupported():
      if fpga.hitlessReset( useSleep=True ):
         printHelper( "hitless-reset for %s fpga succeeded" % ( fpga.name() ) )
         rc = 0
      else:
         printHelper( "hitless-reset for %s fpga failed" % ( fpga.name() ) )
         rc = 1
   elif fpga.isPoeMaintainResetSupported() and options.poeMaintain:
      printHelper( "performing poeMaintainReset for %s fpga " % ( fpga.name() ) )
      if fpga.poeMaintainReset( useSleep=True ):
         printHelper( "poeMaintainReset for %s fpga succeeded" % ( fpga.name() ) )
         rc = 0
      else:
         printHelper( "poeMaintainReset for %s fpga failed" % ( fpga.name() ) )
         rc = 1
   else:
      printHelper( "hitless-reset for %s fpga not supported" % ( fpga.name() ) )
      rc = 0
   return rc

def stopAgents( groupOfAgentsToStop ):
   agentStopCmds = "configure\n"
   for aList, _  in groupOfAgentsToStop:
      for a in aList:
         agentStopCmds += "agent " + a + " shutdown\n"
   op = Tac.run( [ sys.executable, fastCli, "-p", "2", "-c", agentStopCmds ],
                 stdout=Tac.CAPTURE )
   printHelper( op )

def startAgents( groupOfAgentsToStart ):
   agentStartCmds = "configure\n"
   for aList, sleepTime  in groupOfAgentsToStart:
      if not aList:
         continue
      agentStartCmds = "configure\n"
      for a in aList:
         agentStartCmds += "no agent " + a + " shutdown\n"
      op = Tac.run( [ sys.executable, fastCli, "-p", "2", "-c", agentStartCmds ],
                    stdout=Tac.CAPTURE )
      printHelper( op )
      if sleepTime > 0:
         time.sleep( sleepTime )


def getRunningAgentsUsingScd( groupOfAgents ):
   runningAgentsGroups = []
   runningAgentsRestartCount = {}
   runningAgentsUptime = {}
   for aList, sleepTime in groupOfAgents:
      runningAgentInGroup = []
      for a in aList:
          # For Strata agent, the Cli will only recognize agent name as
          # 'Strata' for the 'show agent <agent> uptime' command
          # But for the 'agent <agent> shutdown' command, the Cli will recognize
          # the agent name as 'Strata-FixedSystem'
         if a == "Strata":
            a = "Strata-FixedSystem"
         cmd = "show agent " + a + " uptime | json\n"
         out = Tac.run( [ sys.executable, fastCli, "-p", "2", "-c", cmd ],
                        stdout=Tac.CAPTURE, asRoot=True, ignoreReturnCode=True )
         if not re.search( "Invalid input", out ):
            jsonOut = json.loads( out )
            if a in jsonOut[ "agents" ]:
               if a == "Strata-FixedSystem":
                  agentName = "Strata"
               else:
                  agentName = a
               runningAgentsRestartCount[ agentName ] = \
                     jsonOut[ "agents" ][ a ][ "restartCount" ]
               runningAgentsUptime[ agentName ] = \
                     jsonOut[ "agents" ][ a ][ "agentStartTime" ]
               runningAgentInGroup += [ agentName ]
      runningAgentsGroup = [ runningAgentInGroup, sleepTime ]
      runningAgentsGroups += [ runningAgentsGroup ]

   return runningAgentsGroups, runningAgentsRestartCount, runningAgentsUptime

def printRunningAgentInfo( groupOfAgents, restartCount, uptime ):
   for aList, _ in groupOfAgents:
      for a in aList:
         printHelper( "%s: restart: %s, uptime: %s"
                      % ( a, restartCount[ a ], uptime[ a ] )  )

anyFpgaWithSupport = False

if fpgaName:
   foundByName = False
   for f in fpgas:
      if f.name() == fpgaName:
         printHelper( "checking if hitless reset is supported by %s" % f.name() )
         anyFpgaWithSupport = anyFpgaWithSupport or f.isHitlessResetSupported()
         if options.poeMaintain:
            poeMaintSupport = f.isPoeMaintainResetSupported()
            anyFpgaWithSupport = anyFpgaWithSupport or poeMaintSupport
         foundByName = True
   if not foundByName:
      printHelper( "%s fpga is not present" % ( fpgaName ) )
else:
   for f in fpgas:
      printHelper( "checking if hitless reset is supported by %s" % f.name() )
      anyFpgaWithSupport = anyFpgaWithSupport or f.isHitlessResetSupported()
      if options.poeMaintain:
         anyFpgaWithSupport = anyFpgaWithSupport or f.isPoeMaintainResetSupported()

if not anyFpgaWithSupport:
   printHelper( "None of the fpgas support hitless reset, exiting" )
   exit( 0 ) # pylint: disable=consider-using-sys-exit
else:
   printHelper( "The fpgas support hitless reset, continuing" )

groupOfRunningAgents = [ [], 0 ]
runningRestartCount = {}
runningUptime = {}

def countAgentsInGroups( groupOfAgents ):
   count = 0
   for aGroup in groupOfAgents:
      count += len( aGroup[ 0 ] )
   return count

numAgentsMasterList = countAgentsInGroups( groupOfAgentsUsingScd )

hrc = 0
waitCount = 15
sleepPerWait = 2

if options.restartAgents:
   ( groupOfRunningAgents, runningRestartCount, runningUptime ) = \
      getRunningAgentsUsingScd( groupOfAgentsUsingScd )
   printHelper( "Master list of agents has %s agents" % numAgentsMasterList )
   printHelper( "They are: %s" % groupOfAgentsUsingScd )
   printHelper( "%d of these agents are running and will shut down"
                % countAgentsInGroups( groupOfRunningAgents ) )
   printRunningAgentInfo( groupOfRunningAgents, runningRestartCount, runningUptime )
   printHelper( "Will stop agents now." )
   stopAgents( groupOfRunningAgents )
   for i in range( waitCount ):
      time.sleep( sleepPerWait )
      ( groupOfNotStoppedAgents, notStoppedAgentsRestartCount, 
        notStoppedAgentsUptime ) = \
        getRunningAgentsUsingScd( groupOfRunningAgents )
      if countAgentsInGroups( groupOfNotStoppedAgents ) == 0:
         break
   if countAgentsInGroups( groupOfNotStoppedAgents ) > 0:
      printHelper( "Following %s agents not stopped!!"
                   % countAgentsInGroups( groupOfNotStoppedAgents ) )
      printRunningAgentInfo( groupOfNotStoppedAgents, notStoppedAgentsRestartCount,
                             notStoppedAgentsUptime )
   printHelper( "Seeing %s agents (0 expected)"
                % countAgentsInGroups( groupOfNotStoppedAgents ) )

if not args:
   printHelper( "all of System FPGAs will be reset one-by-one" )
   for f in fpgas:
      printHelper( "calling to reset %s" % f.name() )
      fpgaClassName = "all"
      hrc = hrc or hitlessResetFpga( f )
else:
   foundByName = False
   for f in fpgas:
      if f.name() == fpgaName:
         foundByName = True
         fpgaClassName = fpgaName
         printHelper( "calling to reset given fpga: %s" % f.name() )
         hrc = hitlessResetFpga( f )
   if not foundByName:
      printHelper( "%s fpga is not present" % ( fpgaName ) )

if options.restartAgents:
   printHelper( "Will restart agents that were stopped." )
   startAgents( groupOfRunningAgents )
   for i in range( waitCount ):
      time.sleep( sleepPerWait )
      ( groupOfStartedAgents, startedAgentsRestartCount, 
        startedAgentsUptime ) = getRunningAgentsUsingScd( groupOfRunningAgents )
      if countAgentsInGroups( groupOfRunningAgents ) == \
         countAgentsInGroups( groupOfStartedAgents ):
         break
   printHelper( "Following %s agents are started!!"
                % countAgentsInGroups( groupOfStartedAgents ) )
   printRunningAgentInfo( groupOfStartedAgents, startedAgentsRestartCount,
                          startedAgentsUptime )
   printHelper( "%s agents were running before reset"
                % countAgentsInGroups( groupOfRunningAgents ) )
   printHelper( "After reset, %s agents are running"
                % countAgentsInGroups( groupOfStartedAgents ) )

sys.exit( hrc )
