#!/usr/bin/env python3
# Copyright (c) 2022 Arista Networks, Inc.  All rights reserved.
# Arista Networks, Inc. Confidential and Proprietary.

import Cell
import DesiredTracing
import Tac
import Tracing
from TypeFuture import TacLazyType

# We follow the tracing standard defined in /src/PhyTrace/README.md
__defaultTraceHandle__ = Tracing.Handle( 'HwL1TopologySm' )
DesiredTracing.desiredTracingIs( 'HwL1TopologySm/13' )

t1 = Tracing.trace1
t3 = Tracing.trace3
t7 = Tracing.trace7

RedundancyMode = TacLazyType( 'Redundancy::RedundancyMode' )

class RedundancyStatusReactor( Tac.Notifiee ):
   '''
   This Sm is responsible for listening to the RedundancyStatus of the system, and
   calling into the parent sm to try and start the L1Topology TopoSm when it changes.
   '''

   notifierTypeName = 'Redundancy::RedundancyStatus'

   def __init__( self, starter, redundancyStatus ):
      super().__init__( redundancyStatus )
      self.starter = starter

   @Tac.handler( 'mode' )
   def handleRedundancyMode( self ):
      t7( 'Handling redundancy mode' )
      # Call into the starter whenever we see the redundancy status's mode change
      self.starter.manageTopoSm()

class L1PolicyDirReactor( Tac.Notifiee ):
   '''
   This Sm is responsible for listening to the L1 Policy Dir of the system, and
   calling into the parent sm to try and start the L1Topology TopoSm when we see the
   l1InfraApi entity get created/destroyed.
   '''

   notifierTypeName = 'Tac::Dir'

   def __init__( self, starter, policyDir ):
      super().__init__( policyDir )
      self.starter = starter

   @Tac.handler( 'entityPtr' )
   def handleEntity( self, entityName ):
      t7( 'Handling policy entity:', entityName )
      if entityName == "l1InfraApi":
         # Call into the starter whenever we see the l1InfraApi get created/destroyed
         self.starter.manageTopoSm()

class TopoSmStarter:
   '''
   This Sm is responsible for managing the lifespan of the L1Topology TopoSm that we
   use to generate and resolve the topology. There are several cases that we need to
   consider for when exactly the TopoSm should be started.

   As a first-level filter, we only want to start the TopoSm if we actually have the
   new L1 infrastructure running on the system. This does not neccessarily mean that
   L1Topology is in use across the whole system, or even at all, but if L1Topology is
   in use, we are by definition using the new infra.

   Another consideration is during SSO, we only want to instantiate the TopoSm on the
   newly active supervisor after the entire switchover process is complete. This is
   done to ensure that while the switchover is in-progress we don't attempt to make
   any changes to the topology that could impact agents as they hitlessly initialize
   themselves.

   This is because the various L1 agents on the system resort to hitful operations
   (or crashing) if they find a mismatch between their currently programmed state and
   the state of the topology/configuration in Sysdb which can then impact/fail the
   switchover process as a whole.

   As an example, on Tundra the supervisors also contain a dataplane component to the
   card. This means that when a sueprvisor is removed, we simultaneously perform a
   switchover _and_ modify the topology. We need to preserve the old state of the
   topology up until the switchover has resolve, then process the card removal like
   any other.

   However, as an exception to the above, we also want to start the TopoSm while we
   are still in the process of switching over if we are in the edgecase where the
   topology is marked as invalid due to some ongoing change/resolution. This is to
   ensure that we will finish resolving the topology back to a sane state before the
   various agents start trying to look at it for their configuration.
   '''

   def __init__( self, redundancyStatus, policyDir, generationReader, *args ):
      # Store the args as we need them when starting the TopoSm
      self.redundancyStatus = redundancyStatus
      self.policyDir = policyDir
      self.generationReader = generationReader
      self.topoSmArgs = tuple( args )

      # Right now we need to keep the TopoSm as a singleton to allow Fru to setup the
      # SSU boot stage handling properly. This will need to be this way as long as
      # this FruPlugin exists, but could be fixed if this FruPlugin was replaced by
      # moving the TopoSm into the L1PolicyAgent; see BUG740013 for more details.
      self.topoSmSingleton = \
            Tac.singleton( 'Hardware::L1Topology::HwL1TopologySmSingleton' )
      # Forcibly clear the topoSm in the singleton when we start so that we keep the
      # mounted dirs in-sync with the PluginContext
      self.topoSmSingleton.hwL1TopologySm = None

      # Setup the reactors for later triggers, and attempt to immediately manage the
      # TopoSm as no reactors will fire if the conditions are already met.
      self.reactors = [
         RedundancyStatusReactor( self, redundancyStatus ),
         L1PolicyDirReactor( self, policyDir ),
      ]
      self.manageTopoSm()

   def manageTopoSm( self ):
      # We only run the TopoSm on systems that use the new L1 infra.
      if "l1InfraApi" not in self.policyDir:
         t7( 'L1 Infra not present on the system, skipping starting L1 Topology Sm' )
         self.stopTopoSm()
         return

      # We need to start the TopoSm if we are in one of two cases:
      # 1) If we are the on the active supervisor, we need to run as we are the one
      #    managing the topology.
      # 2) If we are trying to switchover from the standby, and the topology is stuck
      #    in an invalid state, we need to start the TopoSm "early" in order to
      #    sanitize the topology back to a valid state again. This is needed so that
      #    the forwarding agents can look at the topology during initialization.
      if self.redundancyStatus.mode == RedundancyMode.active:
         self.startTopoSm()
      elif ( self.redundancyStatus.mode != RedundancyMode.standby and
             not self.generationReader.topoGen().valid ):
         t1( 'Starting L1 Topology Sm during switchover to revalidate the topology' )
         self.startTopoSm()
      else: # We should never see a transition away from active, but just in case...
         self.stopTopoSm()

   def startTopoSm( self ):
      if self.topoSmSingleton.hwL1TopologySm:
         t7( 'L1 Topology Sm is already running' )
         return

      t3( 'Starting the L1 Topology Sm' )
      self.topoSmSingleton.hwL1TopologySm = self.topoSmArgs

   def stopTopoSm( self ):
      if not self.topoSmSingleton.hwL1TopologySm:
         return

      t3( 'Stopping the L1 Topology Sm' )
      self.topoSmSingleton.hwL1TopologySm = None

l1TopoSmStarter = None
def Plugin( context ):
   '''Mount the neccessary Sysdb paths and load the topology sm.'''
   mg = context.entityManager.mountGroup()

   # Paths required for the TopoSmStarter
   redundancyStatus = mg.mountPath( Cell.path( 'redundancy/status' ) )
   policyDir = mg.mountPath( 'hardware/l1/policy' )

   # Paths required for the TopoSm
   hwChassisStatus = mg.mountPath( 'hardware/chassis/status' )
   hwSliceDir = mg.mountPath( 'hardware/slice' )
   asuStatus = mg.mountPath( 'asu/hardware/status' )
   bootCompletionStatus = mg.mountPath( Cell.path( 'stage/boot/completionstatus' ) )
   topoDir = mg.mountPath( 'hardware/l1/topology' )
   mappingDir = mg.mountPath( 'hardware/l1/mapping' )
   mappingConfigAgentDir = mg.mountPath( 'hardware/l1/config/mapping/agent' )
   fruModelDir = mg.mountPath( 'hardware/l1/fru/topology' )
   capRestrictionsDir = mg.mountPath( 'hardware/l1/fru/capabilities' )
   phyTopoDir = mg.mountPath( 'hardware/phy/topology/allPhys' )
   polarityOverrideDir = mg.mountPath( 'intfSlot/config/polarity/override' )

   def mountsCompleteHook():
      # We need to load in the various L1Topology plugins defined in EOS to pass
      # their output to the TopoSm, so load them in here after we have finished
      # mounting the prequisite paths from Sysdb.
      # Note: we do this regardless as to standby/active as this just populates the
      #       local context and won't affect anything outside of Fru.
      t3( 'Loading L1 Topology Plugins' )
      pluginContext = Tac.newInstance( 'Hardware::L1Topology::Plugin::Context',
                                       topoDir, mappingDir )
      loader = Tac.newInstance( 'Hardware::L1Topology::Plugin::Loader' )
      loader.loadPlugins( 'L1TopologyPlugin', pluginContext, '', '' )

      # We only want to run the L1 Topology Sm after ceratin criteria are met, so
      # create an Sm to watch the various required inputs to gate the TopoSm.
      # We pass in all of the mounted dirs from above as well as the context and
      # FruReady status instance that the TopoSm needs as constructor args.
      global l1TopoSmStarter
      l1TopoSmStarter = TopoSmStarter(
         redundancyStatus,
         policyDir,
         Tac.newInstance( "Hardware::L1Topology::GenerationReader", topoDir ),
         hwChassisStatus,
         hwSliceDir,
         Tac.newInstance( "Stage::AsuCompletionDetector",
                          asuStatus, bootCompletionStatus, redundancyStatus ),
         topoDir,
         mappingDir,
         mappingConfigAgentDir,
         pluginContext,
         Tac.newInstance( 'Hardware::L1Topology::FruReadyWatcherStatus' ),
         fruModelDir,
         capRestrictionsDir,
         phyTopoDir,
         polarityOverrideDir,
      )

   mg.close( mountsCompleteHook )
