# Copyright (c) 2017 Arista Networks, Inc.  All rights reserved.
# Arista Networks, Inc. Confidential and Proprietary.

import Cell
import Tac
import StageSysdbHelper
import Tracing
from StageGraphUtil import getPlatformType
from Toggles.StageMgrToggleLib import (
      toggleStrataPimSsoEnabled,
      toggleSmashSSOEnabled,
      toggleEvpnMulticastSSOEnabled,
      toggleTundraSupUplinkFastSwitchoverStageEnabled )

traceHandle = Tracing.Handle( 'StageGraphs' )
t0 = traceHandle.trace0

# register the stage graph for SSO switchover
def registerSwitchover( entityManager ):
   rcConstants = Tac.Value( "ReloadCause::ReloadCauseConstants" )
   fatalErrorResetMode = Tac.Type( "Stage::FatalError::ResetMode" )

   sh = StageSysdbHelper.StageHelper( entityManager, 'switchover' )
   sh.registerStageClass()
   sh.resetModeIs( fatalErrorResetMode.resetAll )
   sh.reloadCauseDescIs( rcConstants.switchoverDesc )

   # Note: registerSwitchoverStage will complain if you depend on a
   # string-name of a stage before the corresponding stage was defined.
   # So the order matters.


   # This event starts the hardware switchover rolling.
   sh.registerStageDependency( "PCIEAcquired", [] )
   sh.registerStageDependency( "SysdbWritable", [] )
   if toggleSmashSSOEnabled():
      sh.registerStageDependency( "SmashWritable", [] )

   platform = getPlatformType()
   t0( "platform", platform )
   if platform == "strata":
      sh.registerStage( "StrataCentral", "DmaReady", [ "PCIEAcquired" ], timeout=5 )
   else:
      sh.registerStage( "ForwardingAgent", "DmaReady",
                                  [ "PCIEAcquired" ] )

   # Some more knowledge that RedSup has of basic stages that we will encode
   # with dummy events.
   # Critical events that are so time-critical that they can't afford to wait
   # until Sysdb is writable (we have to guarantee that any events in this
   # stage do not try to write to Sysdb entities unless they were already
   # writable before switchover)
   sh.registerStageDependency( "TimeCriticalServices",
                               [ "PCIEAcquired", "DmaReady" ] )
   sh.registerStage( "Stp", "TimeCriticalServices" )
   sh.registerStage( "StpTxRx", "TimeCriticalServices" )
   sh.registerStage( "LacpTx", "TimeCriticalServices" )
   if toggleTundraSupUplinkFastSwitchoverStageEnabled():
      if platform == "strata":
         sh.registerStage( "StrataCentral", "TimeCriticalServices" )

   # In Sysdb, the SysdbWritable event converts Sysdb to local, writable
   # It unmounts the old active, and it runs the handleMaster callbacks.
   # It depends on TimeCriticalServices only in order that it doesn't interfere
   # with it.
   sh.registerStage( "Sysdb", "SysdbWritable",
                     [ "TimeCriticalServices" ], timeout=2 )

   if toggleSmashSSOEnabled():
      # The switchover graph is local to the supervisor so it's be fine to have an
      # agent name that is specific to that supervisor.
      sh.registerStage( f"Espresso-{Cell.cellId()}", "SmashWritable",
                     [ "TimeCriticalServices" ], timeout=2 )

   criticalServicesDep = [ "SysdbWritable", "PCIEAcquired", "DmaReady" ]

   sh.registerStageDependency( "CriticalServices", criticalServicesDep )

   # MacsecInitialized stage starts parallel to CriticalServices. Not making it part
   # of critical services as it shouldn't block the CriticalServices stage.
   sh.registerStage( "Macsec", "MacsecInitialized",
                     criticalServicesDep,
                     timeout=60, completeNotRunnable=True )

   # Don't need to register any dependencies. (Already registered by RedSup
   # above.)
   # Lag and LACP seem to take about 10 seconds.  We can reduce this once
   # we improve the performances of these two events.
   sh.registerStage( "Lag", "CriticalServices", timeout=10 )
   sh.registerStage( "Stp", "CriticalServices" )
   sh.registerStage( "StpTxRx", "CriticalServices" )
   sh.registerStage( "VxlanSwFwd", "CriticalServices", completeNotRunnable=True )
   #  Sflow too latches onto <CriticalServices> to get notified,
   #  but itself doesn't constitute a stage.

   # A real event.  RedSup needs to mark us as switchover ready.
   # (I think at today's meeting we decided that this will only happen at the
   # very end).
   # But we don't need to wait for non-critical services, in any case.
   sh.registerStage( "ElectionMgr", "SwitchoverReady", [ "CriticalServices" ] )
   sh.registerStage( "ElectionMgr", "PeerPoweredOn", [ "PCIEAcquired" ],
                     complete=True )

   sh.registerStage( "Ira", "VrfStatusUpdate", [ "CriticalServices" ],
                     timeout=30 )

   # ManagementActive depends on PhyEthtool publishing "ready"
   # status, so we need to start PhyEthtool early
   # If we timeout in bringing up all management interfaces
   # in PhyEthtool, we still don't want to abort entire switchover from happening.
   # Hence, we mark ManagementIntfBringUp as non-critical.
   sh.registerStage( "PhyEthtool", "ManagementIntfBringUp",
                     [ "CriticalServices" ], timeout=30, critical=False,
                     completeNotRunnable=True )
   # Note that ManagementActive actually uses
   # and reacts to phyEthtoolStatus::maIntfBringUpCompleteOnActive
   # as an interlock between PhyEthtool processing and itself. So
   # we shouldn't technically need stage graph dependency. However, without
   # this, ManagementActive agent may react to phyEthtoolStatus and attempt
   # to write to read-only mount paths as without fast bring up here, it is
   # possible that these paths are not yet converted to write-only during
   # the switchover
   sh.registerStage( "ManagementActive", "MgmtActiveBringUp",
                     [ "ManagementIntfBringUp" ], complete=True,
                     critical=False )

   # IpRib starts in this stage and does some of its VRF processing to generate
   # inputs needed by BGP
   sh.registerStage( "IpRib", "VrfRoutingInfoUpdate", [ "VrfStatusUpdate" ],
                     timeout=60, completeNotRunnable=True )

   # ribd needs to start early to support graceful restart during SSO. Similarly
   # in multi-agent mode, the protocol agents need to start
   # early as well. Calling this stage as 'RibInitialized' which depends on
   # 'VrfStatusUpdate' stage when all the vrf's are ready on the standby
   # supervisor.
   sh.registerStage( "Tunnel", "RibInitialized", [ "VrfRoutingInfoUpdate" ],
                     timeout=60 )
   sh.registerStage( "ConnectedRoute", "ConnectedRouteRibInitialized",
                     [ "VrfRoutingInfoUpdate", "MgmtActiveBringUp" ],
                     timeout=60, completeNotRunnable=True )

   sh.registerStage( "RouteInput", "RibInitialized", [ "VrfRoutingInfoUpdate" ],
                     timeout=60, completeNotRunnable=True )
   sh.registerStage( "Rib", "RibInitialized", [ "VrfRoutingInfoUpdate" ],
                     timeout=60, completeNotRunnable=True )
   sh.registerStage( "Ospf", "RibInitialized", [ "VrfRoutingInfoUpdate" ],
                     timeout=60, completeNotRunnable=True )
   sh.registerStage( "Ospf3", "RibInitialized", [ "VrfRoutingInfoUpdate" ],
                     timeout=60, completeNotRunnable=True )
   sh.registerStage( "Isis", "RibInitialized", [ "VrfRoutingInfoUpdate" ],
                     timeout=60, completeNotRunnable=True )
   sh.registerStage( "Bgp", "RibInitialized", [ "VrfRoutingInfoUpdate" ],
                     timeout=60, completeNotRunnable=True )
   sh.registerStage( "GribiRoute", "RibInitialized", [ "VrfRoutingInfoUpdate" ],
                     timeout=60, completeNotRunnable=True )

   sh.registerStage( "SrTePolicy", "RibInitialized", [ "VrfRoutingInfoUpdate" ],
                     timeout=60, completeNotRunnable=True )

   # Mpls and label distribution protocol agents need to start early to support
   # NSF during SSO for mpls vpns. Convergence of these protocols are orchestrated
   # using stages as below
   sh.registerStage( "Mpls", "RibInitialized", [ "VrfRoutingInfoUpdate" ],
                     timeout=60, completeNotRunnable=True )

   sh.registerStage( "IpRib", "IpFibReady",
                     [ "RibInitialized", "ConnectedRouteRibInitialized" ],
                     timeout=720, completeNotRunnable=True )

   sh.registerStage( "LdpAgent", "LdpStart", [ "VrfRoutingInfoUpdate" ],
                     timeout=60, completeNotRunnable=True )
   sh.registerStage( "LdpAgent", "MplsLibReady", [ "LdpStart" ],
                     timeout=360, completeNotRunnable=True )
   sh.registerStage( "Isis", "MplsLibReady", [ "IpFibReady" ],
                     timeout=60, completeNotRunnable=True )

   # TunnelTablesReady does not depend on IpFibReady because LDP uses the resolverId
   # (Sysdb) flavor of NexthopResolver for FIB resolution. LDP is the only tunnel
   # source within Mpls that participates in SSO.
   sh.registerStage( "Mpls", "TunnelTablesReady", [ "MplsLibReady" ],
                     timeout=60, completeNotRunnable=True )
   sh.registerStage( "GribiRoute", "TunnelTablesReady", [ "IpFibReady" ],
                     timeout=60, completeNotRunnable=True )

   sh.registerStageDependency( "TunnelTablesProcessed", [ "TunnelTablesReady" ] )
   sh.registerStage( "Tunnel", "TunnelTablesProcessed", timeout=60 )

   sh.registerStage( "IpRib", "TunnelFibReady", [ "TunnelTablesProcessed" ],
                     timeout=60, completeNotRunnable=True )

   if platform == "strata":
      sh.registerStage( "Fru", "Fru-Plugins",
                        [ "CriticalServices", "PCIEAcquired" ], timeout=60 )
   else:
      sh.registerStage( "ForwardingAgent", "HwSyncWaitNormal",
                        [ "CriticalServices" ], timeout=60 )

      # In Fru agent: load plugins and create drivers
      # Does SmBus & Modular system agent need Fru plugins to have been loaded,
      # and drivers created before it can launch all the hardware agents that
      # depend on it?  We certainly need RedSup's FruPlugin to run and clean out
      # all of cell/<other-supervisor-cellid>/
      sh.registerStage( "Fru", "Fru-Plugins",
                        [ "CriticalServices", "PCIEAcquired",
                          "HwSyncWaitNormal", "RibInitialized",
                          "ConnectedRouteRibInitialized" ], timeout=60 )
      # Fru cleanup can be a bit slow (especially if the system is otherwise
      # loaded), since Fru first has to mount all of its state from Sysdb, then
      # exec all of the fdls, then cleanup after the old supervisor. This is
      # fine since this is the part of switchover which is not super time
      # sensitive.

   # ModularSystem agent to run after Fru-Plugins
   # change event timeout to 15 secs
   sh.registerStage( "ModularSystem", "ModularSystem-switchover",
                     [ "Fru-Plugins" ], timeout=15 )

   sh.registerStageDependency( "PcieConfigurable", [ "ModularSystem-switchover" ] )

   sh.registerStage( "Olive", "OliveInitialized", [ "Fru-Plugins" ],
                     timeout=60, completeNotRunnable=True )

   if platform == "strata":
      sh.registerStage( "StrataCentral", "ModuleConfigUpdate",
                        [ "Fru-Plugins", "PcieConfigurable" ], 
                        timeout=120 )
      sh.registerStageDependency( "ModuleStatusUpdate",
                                  [ "ModuleConfigUpdate" ] )

      sh.registerStage( "StrataL3", "StrataVxlanVirtualPortListUpdate",
                        [ "ModuleStatusUpdate" ], timeout=60 )

      sh.registerStage( "StrataL2", "StrataVxlanVirtualPortListReconcile",
                               [ "ModuleStatusUpdate",
                                 "StrataVxlanVirtualPortListUpdate" ] )

      sh.registerStageDependency( "L2MacTblShadowReconcile",
                                  [ "ModuleStatusUpdate",
                                    "StrataVxlanVirtualPortListReconcile" ] )
      sh.registerStage( "StrataL2", "L2MacTblShadowReconcile", timeout=120 )

      sh.registerStageDependency( "L2MacAddrSmUpdate",
                                  [ "L2MacTblShadowReconcile" ] )
      sh.registerStage( "StrataL2", "L2MacAddrSmUpdate", timeout=120 )

      sh.registerStageDependency( "L3FibReady",
                                  [ "StrataVxlanVirtualPortListUpdate",
                                    "RibInitialized",
                                    "ConnectedRouteRibInitialized" ] )
      sh.registerStage( "StrataL3", "L3FibReady", timeout=600 )

      sh.registerStageDependency( "L2RibUpdate", [ "L3FibReady",
                                                   "L2MacAddrSmUpdate" ] )
      sh.registerStage( "Bgp", "L2RibUpdate", timeout=120,
                        completeNotRunnable=True )
      sh.registerStage( "StrataL2", "L2RibUpdate", timeout=120 )

      if toggleEvpnMulticastSSOEnabled():
         sh.registerStageDependency( "IgmpSnoopingReconcile", [ "L3FibReady" ] )
         sh.registerStage( "IgmpSnooping", "IgmpSnoopingReconcile", timeout=120 )

         sh.registerStageDependency( "IgmpSnoopingEvpnSmet", [ "L2RibUpdate" ] )
         sh.registerStage( "IgmpSnooping", "IgmpSnoopingEvpnSmet", timeout=120 )

         sh.registerStageDependency( "McastVpnEncapReconcile", [ "L3FibReady" ] )
         sh.registerStage( "McastVpn", "McastVpnEncapReconcile", timeout=120,
                           completeNotRunnable=True )

         sh.registerStageDependency( "EvpnMulticastReady",
                                     [ "IgmpSnoopingReconcile",
                                       "McastVpnEncapReconcile" ] )
         sh.registerStage( "Bgp", "EvpnMulticastReady", timeout=120,
                        completeNotRunnable=True )

         sh.registerStageDependency( "SpmsiUpdate", [ "L3FibReady" ] )
         sh.registerStage( "Bgp", "SpmsiUpdate", timeout=120,
                           completeNotRunnable=True )

         sh.registerStageDependency( "McastVpnDecapReconcile",
                                     [ "IgmpSnoopingReconcile",
                                       "SpmsiUpdate" ] )
         sh.registerStage( "McastVpn", "McastVpnDecapReconcile", timeout=120,
                           completeNotRunnable=True )


      sh.registerStageDependency( "L2RibReconcile", [ "L2RibUpdate" ] )
      sh.registerStage( "L2Rib", "L2RibReconcile", timeout=60 )

      sh.registerStageDependency( "L2RibReady", [ "L2RibReconcile" ] )
      sh.registerStage( "Bgp", "L2RibReady", timeout=120,
                        completeNotRunnable=True )
      sh.registerStage( "StrataL2", "L2RibReady", timeout=120 )

      sh.registerStage( "StrataL2", "L2MacAddrSmReconcile",
                        [ "ModuleStatusUpdate",
                          "StrataVxlanVirtualPortListReconcile",
                          "L2RibReady" ],
                        timeout=120 )

      sh.registerStage( "StrataL2", "L2McastSmReconcile",
                        [ "L2MacAddrSmReconcile" ], timeout=120 )

      sh.registerStageDependency( "L3UcastSmsReconcile",
                                  [ "L2MacAddrSmReconcile", "L3FibReady" ] )
      sh.registerStage( "StrataL3", "L3UcastSmsReconcile", timeout=60 )

      l3VxlanReadyUpdateDepStages = [ "VxlanUCTunnelsReconcile" ]

      if toggleStrataPimSsoEnabled():
         sh.registerStageDependency( "MrouteReconstructUpdate",
                                     [ "ModuleStatusUpdate" ] )
         sh.registerStage( "StrataL3", "MrouteReconstructUpdate", timeout=60 )

         l3MfibReadyDepStages = [ "L3FibReady", "MrouteReconstructUpdate" ]

         sh.registerStageDependency( "L3MfibReady", l3MfibReadyDepStages )
         sh.registerStage( "StrataL3", "L3MfibReady", timeout=600 )

         l3McastSmsReconcileDepStages = [ "MrouteReconstructUpdate",
                                          "L3UcastSmsReconcile", "L3MfibReady" ]

         if toggleEvpnMulticastSSOEnabled():
            l3McastSmsReconcileDepStages.append( "McastVpnEncapReconcile" )
         sh.registerStageDependency( "L3McastSmsReconcile",
                                     l3McastSmsReconcileDepStages )
         sh.registerStage( "StrataL3", "L3McastSmsReconcile", timeout=60 )

         l3VxlanReadyUpdateDepStages.append( "L3McastSmsReconcile" )

      sh.registerStageDependency( "VxlanUCTunnelsReconcile",
                                  [ "L3UcastSmsReconcile" ] )
      sh.registerStage( "StrataL3", "VxlanUCTunnelsReconcile", timeout=120 )

      sh.registerStageDependency( "VlanReconcile",
                                  [ "L3UcastSmsReconcile" ] )
      sh.registerStage( "StrataVlanTopo", "VlanReconcile", timeout=120 )

      # Stage to notify that vxlan is ready on l3 agent
      sh.registerStageDependency( "L3VxlanReadyUpdate",
                                  l3VxlanReadyUpdateDepStages )
      sh.registerStage( "StrataL3", "L3VxlanReadyUpdate", timeout=60 )

      # Stage to notify that vxlan is ready on l2 agent
      sh.registerStageDependency( "L2VxlanReadyUpdate",
                                  [ "L3VxlanReadyUpdate" ] )
      sh.registerStage( "StrataL2", "L2VxlanReadyUpdate", timeout=60 )

      sh.registerStageDependency( "PrepareL3TableFastWrites",
                                  [ "L3VxlanReadyUpdate" ] )
      sh.registerStage( "StrataL3", "PrepareL3TableFastWrites", timeout=60 )

      sh.registerStageDependency( "StrataMmuReconcile", [ ] )

      sh.registerStageDependency( "PhyInitAllowed", [ "StrataMmuReconcile" ] )

      sh.registerStageDependency( "StrataSliceL2AgentReconcile",
                                  [ "PhyInitAllowed" ] )

      sh.registerStageDependency( "StrataMirroringPortEntryReconcile",
                                  [ "StrataSliceL2AgentReconcile" ] )

      sh.registerStageDependency( "StrataSliceL3AgentReconcile",
                                  [ "StrataSliceL2AgentReconcile",
                                    "L3UcastSmsReconcile" ] )

      sh.registerStageDependency( "StrataSliceVxlanReconcile",
                               [ "L2VxlanReadyUpdate",
                                 "StrataSliceL2AgentReconcile" ] )

      PreQuiesceTrafficDepStages = [ "ModuleStatusUpdate",
                                     "VlanReconcile",
                                     "L2VxlanReadyUpdate",
                                     "PrepareL3TableFastWrites",
                                     "StrataSliceL3AgentReconcile",
                                     "StrataMirroringPortEntryReconcile" ]

      if toggleStrataPimSsoEnabled():
         # Cause PreQuiesceTraffic to depend on L3McastSmsReconcile
         # if PIM SSO is supported
         PreQuiesceTrafficDepStages.append( "L3McastSmsReconcile" )

      sh.registerStageDependency( "PreQuiesceTraffic", PreQuiesceTrafficDepStages )

      sh.registerStage( "StrataL3", "PreQuiesceTraffic", timeout=60 )

      sh.registerStageDependency( "QuiesceTraffic",
                                  [ "PreQuiesceTraffic", ] )

      sh.registerStageDependency( "BulkDownloadStart",
                                  [ "QuiesceTraffic", ] )
      sh.registerStage( "StrataL3", "BulkDownloadStart", timeout=60 )
      sh.registerStage( "StrataL2", "BulkDownloadStart", timeout=60 )
      sh.registerStage( "StrataVlanTopo", "BulkDownloadStart", timeout=60 )

      sh.registerStageDependency( "BulkDownloadComplete",
                                  [ "BulkDownloadStart" ] )

      sh.registerStageDependency( "SliceStatusReady",
                                  [ "BulkDownloadComplete" ] )

      sh.registerStage( "ForwardingAgent", "HwSyncWaitNormal",
                        [ "SliceStatusReady" ], complete=True )

   # Other agents that need to have their cell-dependent Sysdb state get
   # cleaned up should register events around the SysdbCleanup stage
   sh.registerStageDependency( "SysdbCleanup", [ "CriticalServices" ] )

   sh.registerStage( "NorCalCard", "CardPowerAgents",
                     [ "HwSyncWaitNormal", "Fru-Plugins",
                       "ModularSystem-switchover" ] )

   sh.registerStage( "Pca9555", "CardPowerAgents",
                     [ "HwSyncWaitNormal", "Fru-Plugins",
                       "ModularSystem-switchover" ] )

   sh.registerStage( "Ucd9012-system", "CardPowerAgents",
                     [ "HwSyncWaitNormal", "Fru-Plugins",
                       "ModularSystem-switchover" ] )

   if platform != "strata":
      # HwSyncWaitForSlowCards can timeout if SSO is started too early. Thus, it is
      # marked as non-critical. However, if any of the descendants are critical,
      # HwSyncWaitForSlowCards will also be treated as critical, which can cause some
      # SSO ptest failures. Therefore, be sure to mark stages as non-critical when
      # necessary.
      sh.registerStage( "ForwardingAgent", "HwSyncWaitForSlowCards",
                        [ "CardPowerAgents", "Fru-Plugins" ], critical=False )

      sh.registerStage( "MactAgent", "InitMacTable", [ "HwSyncWaitForSlowCards" ],
                        timeout=65, critical=False )

      # L2RibUpdate is supposed to start after L3FibReady stage, but since there is
      # no clear stage definition for non-strata platforms, making L2RibUpdate
      # depend upon LFibSourceReady post which, the PI world actually declares
      # FibReady. Since the Evpn SSO support on non-Strata platforms is not yet
      # present, it is okay to have this non-exact dependency. The L2RibReady stage
      # registration is required for Bgp to complete its EOR processing. These stages
      # do not serve any other purpose for now.
      sh.registerStageDependency( "L2RibUpdate", [ "TunnelFibReady" ] )
      sh.registerStage( "Bgp", "L2RibUpdate", timeout=120,
                        completeNotRunnable=True )

      sh.registerStage( "L2Rib", "L2RibReconcile", [ "L2RibUpdate", "InitMacTable" ],
                        timeout=60, critical=False )

      sh.registerStage( "Bgp", "L2RibReady", [ "L2RibReconcile" ], timeout=120,
                        completeNotRunnable=True, critical=False )

      # EVPN Multicast SSO not supported on non-Strata platforms
      if toggleEvpnMulticastSSOEnabled():
         sh.registerStage( "Bgp", "EvpnMulticastReady", [ "L2RibReconcile" ],
                        timeout=120, completeNotRunnable=True, critical=False )
         sh.registerStage( "Bgp", "SpmsiUpdate", [ "TunnelFibReady" ],
                        timeout=120, completeNotRunnable=True, critical=False )

      # For information regarding staging between SandMact and L2Rib please refer to
      # AID7363
      sh.registerStage( "MactAgent", "L2RibReady", timeout=5, critical=False )

   # The LFibSourceReady stage and others dependent on it are all defined as
   # non-critical, since LFibSourceReady stage depends indirectly on
   # HwSyncWaitForSlowCards, which is defined as non-critical. See comment for
   # HwSyncWaitForSlowCards stage above. In case, any of these stages timeout,
   # then they might cause some traffic loss, but nothing worse than that. If you
   # are adding any dependent stages in this path of stage graph, but that needs to
   # be critical, please discuss with Sand team regarding HwSyncWaitForSlowCards
   # stage.
   sh.registerStage( "Bgp", "LFibSourceReady", [ "TunnelFibReady", "L2RibReady" ],
                     timeout=60, completeNotRunnable=True, critical=False )

   sh.registerStage( "Mpls", "SystemLfibReady", [ "LFibSourceReady" ],
                     timeout=60, completeNotRunnable=True, critical=False )

   sh.registerStage( "AleL3Agent", "PlatformLfibSync", [ "SystemLfibReady" ],
                     timeout=60, completeNotRunnable=True, critical=False )

   sh.initializedIs( True )

