#!/usr/bin/env python3
# Copyright (c) 2023 Arista Networks, Inc.  All rights reserved.
# Arista Networks, Inc. Confidential and Proprietary.

import Agent
import Cell
import Logging
import Tac
import Tracing
import weakref

t0 = Tracing.trace0
t1 = Tracing.trace1
t2 = Tracing.trace2
t3 = Tracing.trace3

agent = None

SYS_SYSTEM_INITIALIZED = Logging.LogHandle(
   "SYS_SYSTEM_INITIALIZED",
   severity=Logging.logNotice,
   fmt="System is initialized",
   explanation="The initial set of running agents have initialized and the system "
   "is ready",
   recommendedAction=Logging.NO_ACTION_REQUIRED )

SYS_SYSTEM_INITIALIZATION_TIMEOUT = Logging.LogHandle(
   "SYS_SYSTEM_INITIALIZATION_TIMEOUT",
   Logging.logWarning,
   fmt="System is declared initialized (some agents such as %s are not ready)",
   explanation="The system has taken enough time to initialize even though "
   "some agents have not finished initialization. Some features might be affected.",
   recommendedAction=Logging.CALL_SUPPORT_IF_PERSISTS )

# Reduce this timeout to 31 min after BUG844493 'Add new interlock in SandPhy
# to wait for fabric readiness before enabling front panel ports' is resolved.
# Set 41 min timeout to gives agents with a 40 min timeout 1 minute to timeout
# before we timeout here.
SYSTEM_INIT_TIMEOUT = ( 41 * 60 )

# wait for 1 minute quiet time until no more agents are launched
AGENT_LAUNCH_DELAY = 60

class AgentStatusReadySm( Tac.Notifiee ):
   notifierTypeName = "Agent::Status"

   def __init__( self, notifier, parent ):
      super().__init__( notifier )
      self.parent = weakref.proxy( parent )
      self.handleReady()

   @Tac.handler( 'ready' )
   def handleReady( self ):
      if self.notifier_.ready:
         self.parent.handleAgentReady( self.notifier_.name )

class LaunchedAgentInfoSm( Tac.Notifiee ):
   notifierTypeName = "Tac::LaunchedAgentInfo"

   def __init__( self, notifier, parent ):
      super().__init__( notifier )
      self.parent = weakref.proxy( parent )
      # initially we don't have any configName, so no need to call

   @Tac.handler( 'configName' )
   def handleLaunchedAgent( self, name ):
      # any change in configName triggers parent to reevaluate if the
      # agent is of interest
      self.parent.handleLaunchedAgent( self.notifier_.name )

   def close( self ):
      self.parent.handleLaunchedAgent( self.notifier_.name )
      super().close()

class SystemInitSm:

   def __init__( self, launchedAgents,
                 launcherAgentConfigDir,
                 agentStatusDir,
                 systemStatus,
                 initialAgents ):
      self.launchedAgents = launchedAgents
      self.launcherAgentConfigDir = launcherAgentConfigDir
      self.agentStatusDir = agentStatusDir
      self.systemStatus = systemStatus
      self.pendingAgents = set( p for p in launchedAgents
                                if self.isRealAgent( p ) )
      self.pendingAgents |= set( initialAgents )
      t0( f"pending agents: {sorted(self.pendingAgents)}" )
      self.readyAgents = set()
      # set this flag so in case all agents are immediately ready,
      # we don't invoke cleanup before things are fully initialized.
      # sets the system status initialized flag after a timeout
      self.timeoutClock = Tac.ClockNotifiee( handler=self.handleTimeout,
                                             timeMin=Tac.now() +
                                             SYSTEM_INIT_TIMEOUT )
      # when all agents are ready, wait for some time until no more agents
      # are launched.
      self.delayClock = Tac.ClockNotifiee( handler=self.handleDelayedComplete,
                                           timeMin=Tac.endOfTime )

      # when the agent is added to the pending list, record the agent's start waiting
      # timestamp for timeout debugging purposes.
      self.pendingAgentsStartTime = {}
      for pendingAgent in self.pendingAgents:
         self.pendingAgentsStartTime[ pendingAgent ] = Tac.now()

      self.initialized = False
      self.lastLaunchedAgentTime = Tac.now()
      self.launchedAgentSm = Tac.collectionChangeReactor(
         launchedAgents.entityPtr,
         LaunchedAgentInfoSm,
         reactorArgs=( self, ) )
      self.agentStatusDirSm = Tac.collectionChangeReactor(
         agentStatusDir.entityPtr,
         AgentStatusReadySm,
         reactorArgs=( self, ) )
      # it's possible that we are done now, if so, clean up
      self.initialized = True
      if not self.pendingAgents:
         self.handleComplete()

   def getLauncherAgentConfig( self, name ):
      launchedInfo = self.launchedAgents.entityPtr.get( name )
      for configName in launchedInfo.configName:
         # find the first agentConfig
         agentConfig = self.launcherAgentConfigDir.agent.get( configName )
         if agentConfig:
            return agentConfig
      return None

   def isRealAgent( self, name ):
      agentConfig = self.getLauncherAgentConfig( name )
      if not agentConfig:
         t1( f"Cannot find launcher config for {name}" )
         return False
      return agentConfig.agent and not agentConfig.userDaemon

   def addPending( self, name ):
      self.pendingAgents.add( name )
      # cancel delay timer
      self.lastLaunchedAgentTime = Tac.now()
      self.delayClock.timeMin = Tac.endOfTime
      self.pendingAgentsStartTime[ name ] = Tac.now()

   def removePending( self, name ):
      self.pendingAgents.remove( name )
      if name in self.pendingAgentsStartTime:
         del self.pendingAgentsStartTime[ name ]
      if not self.pendingAgents:
         self.handleComplete()
      elif len( self.pendingAgents ) <= 5:
         # log the remaining few agents for debugging
         t2( f"remaining agents: {sorted(self.pendingAgents)}" )

   def handleLaunchedAgent( self, name ):
      if self.systemStatus.initialized:
         # already done
         return
      if name in self.launchedAgents.entityPtr:
         if name in self.readyAgents:
            t1( f"agent {name} launched and already ready" )
         elif self.isRealAgent( name ):
            t2( f"agent {name} launched and not yet ready" )
            self.addPending( name )
         elif name in self.pendingAgents:
            t2( f"process {name} is not a real agent and removed from pending" )
            self.removePending( name )
         else:
            t3( f"process {name} is not a real agent" )
      else:
         if name in self.pendingAgents:
            t3( f"agent {name} unlaunched and pending" )
            self.removePending( name )
         else:
            t3( f"agent {name} unlaunched and not pending" )

   def handleAgentReady( self, name ):
      self.readyAgents.add( name )
      if name in self.pendingAgents:
         t1( f"launched agent {name} is ready "
             f"({len( self.pendingAgents ) - 1} left)" )
         self.removePending( name )
      else:
         t3( f"unlaunched agent {name} is ready" )

   def handleComplete( self ):
      if not self.initialized:
         return

      # Do we need to wait for more agents to launch?
      expireTime = self.lastLaunchedAgentTime + AGENT_LAUNCH_DELAY
      if expireTime < Tac.now():
         self.handleDelayedComplete()
      else:
         t1( "wait for more agents to be launched" )
         self.delayClock.timeMin = expireTime

   def handleDisplayPendingAgents( self ):
      if not self.pendingAgents:
         t0( "there are no pending agents" )
         return
      for name in self.pendingAgents:
         startTime = self.pendingAgentsStartTime[ name ]
         howLong = Tac.now() - startTime
         t3( f"agent: {name} has been in the pending list for {howLong:.5f}s" )

   def handleTimeout( self ):
      t0( "timeout waiting for agents to be ready" )
      # print up to 3 agents
      agents = ' '.join( sorted( self.pendingAgents )[ : 3 ] )
      Logging.log( SYS_SYSTEM_INITIALIZATION_TIMEOUT, agents )
      self.handleDisplayPendingAgents()
      self.finalize()

   def handleDelayedComplete( self ):
      t0( "all agents are ready" )
      Logging.log( SYS_SYSTEM_INITIALIZED )
      self.finalize()
   def finalize( self ):
      self.systemStatus.initialized = True
      self.launchedAgentSm = None
      self.agentStatusDirSm = None
      self.timeoutClock = None
      self.delayClock = None
      self.pendingAgentsStartTime = {}

class SystemInitMonitor( Agent.Agent ):

   def __init__( self, entityManager ):
      global agent
      super().__init__( entityManager )
      self.systemInitSm = None
      agent = self

   def doInit( self, entityManager ):
      mg = entityManager.mountGroup()
      agentStatusDir = mg.mountPath( Cell.path( "agent/status" ) )
      launchedAgents = mg.mountPath( Cell.path( "launcher/LaunchedAgents" ) )
      launcherAgentConfigDir = mg.mountPath( "sys/config/agentConfigDir" )
      systemStatus = mg.mountPath( "sys/status/system" )

      def _fini():
         self.systemInitSm = SystemInitSm( launchedAgents,
                                           launcherAgentConfigDir,
                                           agentStatusDir,
                                           systemStatus,
                                           set( [ 'Fru' ] ) )
      mg.close( callback=_fini )

def main():
   container = Agent.AgentContainer( [ SystemInitMonitor ] )
   container.runAgents()
