# Copyright (c) 2020 Arista Networks, Inc.  All rights reserved.
# Arista Networks, Inc. Confidential and Proprietary.

# pylint: disable=consider-using-f-string

import io

import Afetch
import Agent
import BothTrace
import Cell
import CliCommon
import DeviceNameLib
import EapiClientLib
import glob
import hashlib
import json
import Logging
import os
import shutil
import SimpleConfigFile
import subprocess
import Tac
import Tracing
import ZtnLogger
import ZtnLogs
import ZtnSwitchConfigSaver
import ztn.manifest
import ztn.settings
import ztn.transaction
from ztn.parser.manifest_v3 import Manifest
from GenericReactor import GenericReactor
import AgentDirectory

__defaultTraceHandle__ = Tracing.Handle( "ZtnAgent" )

# TRACE LEVELS
# 0 : Errors
# 1 : Important stages of the agent init
# 5 : Reactors
# 8 : Function entry and other debug
v = BothTrace.Var
bt0 = BothTrace.tracef0
bt1 = BothTrace.tracef1
bt5 = BothTrace.tracef5
bt8 = BothTrace.tracef8
t8 = Tracing.trace8

FLASH_PATH = '/mnt/flash'
MIN_FREE_FLASH = 1024 * 30 # Min space to save startup-config and boot-config
BOOTED_IMAGE_SWI_PATH = FLASH_PATH + '/.boot-image.swi'
ZTN_BOOT_INFO_PATH = FLASH_PATH + '/ztn-boot-info'
PERSIST_ZTN_LOG_DIR = '/mnt/flash/debug/ztn' # used only for ptest
State = Tac.Type( 'Ztn::SyncState' )
HashAlgo = Tac.Type( "Ztn::HashAlgorithm" )

def name():
   return 'Ztn'

def getRealPath( path ):
   fsRoot = os.environ.get( 'FILESYSTEM_ROOT', '' )
   if fsRoot:
      path = path.replace( '/mnt', fsRoot )
   return path

def persistZtnTracingLogInPtest():
   t8( 'persistZtnTracingLogInPtest' )
   if os.path.isdir( PERSIST_ZTN_LOG_DIR ):
      i = len( os.listdir( PERSIST_ZTN_LOG_DIR ) )
      for f in glob.glob( '/var/log/agents/Ztn*' ):
         shutil.copy( f, f"{ PERSIST_ZTN_LOG_DIR }/{ i }.{ os.path.basename( f ) }" )

def computeSha256Hash( fname ):
   h = hashlib.sha256()
   with open( fname, mode='rb' ) as s:
      while True:
         data = s.read( 10 * 1024 * 1024 )
         if not data:
            break
         h.update( data )
   return h.hexdigest()

def swiNameFromUrl( url ):
   return url.split( '/' )[ -1 ]

def installedImagePath():
   config = SimpleConfigFile.SimpleConfigFileDict(
               getRealPath( '/mnt/flash/boot-config' ) )
   swiUrl = config.get( 'SWI', '' )
   swiName = swiNameFromUrl( swiUrl )
   return getRealPath( FLASH_PATH + '/' + swiName )

class GetRequest:
   SWI = 'swi'
   CONFIG = 'config'

class ZtnConfigSyncer:
   """ This class is responsible for the init and steady cycle of ZTN in which
   the following tasks are performed:
   1. Download manifest from the configured controllers
   2. Download startup-config using the URL provided in the manifest. Append
      the mandatory config to this config. Replace running-config with this
      aggregate config
   3. Download SWI using the URL provided in the manifest. Install the SWI and
      reload the switch
   4. If any of the steps 1-3 fail, go to step 1

   Syslogs are made based on the cycle of the agent. If the agent is in the
   initial handshake phase, then the following logs are sent:
      DMF_CONTROLLER_HANDSHAKE_INIT
      DMF_CONTROLLER_DOWNLOAD_FAILED
      DMF_CONTROLLER_HANDSHAKE_FAILED
      DMF_CONTROLLER_HANDSHAKE_COMPLETE

   and if the agent is in the steady state cycle when it may receive sync request
   from the controller, following are thrown:
      DMF_CONTROLLER_SYNC_START
      DMF_CONTROLLER_DOWNLOAD_FAILED
      DMF_CONTROLLER_SYNC_FAILED
      DMF_CONTROLLER_SYNC_TIMEOUT
      DMF_CONTROLLER_SYNC_COMPLETE
   """
   TRANSACT_ATTEMPTS = 1
   RETRY_TIME = 10
   ZTN_VERSION_NUM = 3
   PLATFORM_TYPE = 'eos'
   SSL = True
   CONTROLLER_PORT = 8843
   RESYNC_TIMEOUT = 300
   STEADY_CYCLE = 30    # The steady Cycle Interval

   def __init__( self, hwEntMib, config, status, logger, sysname, qosSliceHwStatus ):
      self.hwEntMib = hwEntMib
      self.config = config
      self.status = status
      self.logger = logger
      self.sysname = sysname
      self.qosSliceHwStatus = qosSliceHwStatus
      self.modular = hwEntMib.chassis is not None
      self.configSaver = ZtnSwitchConfigSaver.SwitchConfigSaver(
         modular=self.modular )
      self.bootInfo = SimpleConfigFile.SimpleConfigFileDict(
                        getRealPath( ZTN_BOOT_INFO_PATH ),
                        createIfMissing=True,
                        autoSync=True )
      self.primaryController = None
      self.transactionMgr = None
      self.httpRequests = {}
      self.manifestRetryTimer = Tac.ClockNotifiee( self.downloadManifest,
                                                   timeMin=Tac.endOfTime )
      self.resyncTimer = Tac.ClockNotifiee( self.handleResyncComplete,
                                            timeMin=Tac.endOfTime )
      self.resyncInProgress = False
      # For testing
      self.manifestRetries = 0

   def doInit( self ):
      self.status.syncStages.reset()
      # platform will be in the form of arch-model-eos e.g. x86-64-7280QR-C72-eos
      arch = Tac.Type( "EosUtils::SwiVersion" ).parse( '' ).swiArch
      modelName = self.hwEntMib.root.modelName
      t8( f"arch={arch} model={modelName}" )
      # The model name is registered in lower case with DMF controller
      modelName = modelName.lower()
      platform = "-".join( [ arch, modelName, self.PLATFORM_TYPE ] )
      bt1( v( 'platform={!r} systemMac={!r}'.format(
         platform, self.hwEntMib.systemMacAddr ) ) )
      self.status.platform = platform
      self.status.systemMac = self.hwEntMib.systemMacAddr
      intf = self.config.sourceIntf
      if intf == "":
         if self.modular:
            intf = "Management0"
         else: # FixedSystem
            intf = "Management1"
         self.configSaver.sourceIntf = None
      else:
         self.configSaver.sourceIntf = intf
      intf = DeviceNameLib.eosIntfToKernelIntf( intf )
      t8( "ZtnmTransactionManager source intf:", intf )
      self.transactionMgr = ztn.transaction.ZtnmTransactionManager(
                              self.logger, self.status.systemMac, intf, platform )

   def handleResyncComplete( self ):
      if not self.resyncInProgress:
         return
      if self.status.syncState == State.complete:
         Logging.log( ZtnLogs.DMF_CONTROLLER_SYNC_COMPLETE )
         self.logger.addMsg( "Resync complete" )
      else:
         Logging.log( ZtnLogs.DMF_CONTROLLER_SYNC_TIMEOUT )
         self.logger.addMsg( "Resync timed out" )
      self.resyncInProgress = False

   def isSyncInProgress( self ):
      return self.status.syncState in (
               State.configDownloadInProgress,
               State.configDownloaded,
               State.configSaveComplete,
               State.swiDownloadInProgress,
               State.swiDownloaded,
               State.swiInstallComplete,
               State.rebooting )

   def doResync( self ):
      if self.isSyncInProgress():
         self.logger.addMsg( "Ignored resync request" )
         return

      # When a resync is initiated, the existing shouldn't be reset until an
      # update is available in the new sync cycle
      Logging.log( ZtnLogs.DMF_CONTROLLER_SYNC_START )
      self.logger.addMsg( "Processing resync request" )
      self.resyncTimer.timeMin = Tac.now() + self.RESYNC_TIMEOUT
      self.status.syncStages.reset()
      self.resyncInProgress = True
      self.doCleanup( resetState=False )
      self.downloadManifest()

   def doAddController( self, ip ):
      if not self.transactionMgr:
         return
      ip = str( ip )
      self.transactionMgr.add_static_addresses( ip )
      if not self.status.syncStages.complete:
         self.logger.addMsg(
               "Attempting manifest download from controller %s" % ip )
         self.downloadManifest()

   def doDelController( self, ip ):
      if not self.transactionMgr:
         return
      ip = str( ip )
      try:
         self.transactionMgr.static_addresses.remove( ip )
      except ValueError:
         assert False, 'Invalid IP deleted'
      if self.primaryController == ip:
         # Reload manifest when primary controller is removed
         self.downloadManifest()

   def syslogFailure( self, *args ):
      if self.resyncInProgress:
         Logging.log( ZtnLogs.DMF_CONTROLLER_SYNC_FAILED, *args )
      else:
         Logging.log( ZtnLogs.DMF_CONTROLLER_HANDSHAKE_FAILED, *args )

   def downloadManifest( self ):
      bt5()
      if self.status.syncStages.complete:
         if Tac.utcNow() - self.status.manifestTs < self.STEADY_CYCLE * 3:
            bt8( 'Steady cycle guard, return' )
            self.scheduleManifestDownload()
         else:
            bt0( 'Steady cycle is stuck. Trigger a resync' )
            self.doResync()
         return
      self.status.syncState = State.init
      errMsg = "Downloading manifest failed"
      try:
         result, controller, method = self.transactionMgr.transact(
                                          self.TRANSACT_ATTEMPTS,
                                          static=True, mdns=False, peer=False )
      except ValueError as e:
         bt0( v( f"{errMsg}: {e}" ) )
         # Raised when basic checks fail in ztn module.
         # Maybe it's better to just assert here.
         self.status.syncState = State.manifestDownloadFailed
         self.scheduleManifestDownload()
      else:
         if not method:
            # Failed after multiple attempts. Keep trying
            Logging.log( ZtnLogs.DMF_CONTROLLER_DOWNLOAD_FAILED, "manifest" )
            self.logger.addMsg( errMsg )
            self.status.syncState = State.manifestDownloadFailed
            self.scheduleManifestDownload()
         else:
            self.status.manifestTs = Tac.utcNow()
            self.primaryController = controller
            self.processManifest( result )

   def scheduleManifestDownload( self ):
      self.manifestRetries += 1
      self.manifestRetryTimer.timeMin = Tac.now() + self.RETRY_TIME

   @staticmethod
   def _isValidManifest( manifest ):
      validSwiParams = bool( manifest.swiUrl and manifest.swiChecksum )
      validConfigParams = bool( manifest.configUrl and manifest.configChecksum )
      return validSwiParams and validConfigParams

   def _handleInvalidManifest( self, msg ):
      bt0( v( msg ) )
      self.syslogFailure( msg.lower() )
      self.status.syncState = State.invalidManifest
      self.logger.addMsg( msg )
      self.scheduleManifestDownload()

   def processManifest( self, manifest ):
      bt5()
      self.status.manifestTs = Tac.utcNow()

      # Free minimum disk space to process the manifest
      self.makeDiskSpaceForImage( MIN_FREE_FLASH )

      if not manifest:
         self._handleInvalidManifest( "Empty manifest file received" )
         return
      # Convert all manifest params from unicode to strings in shot
      manifestJsonStr = json.dumps( manifest )
      manifest = json.loads( manifestJsonStr )
      # Convert into Manifest object for ease of access to attributes
      try:
         manifest = Manifest.fromJson( manifest )
      except ValueError:
         self._handleInvalidManifest( "Parsing manifest failed" )
         return

      if not self._isValidManifest( manifest ):
         self._handleInvalidManifest( "Manifest is incomplete" )
         return

      # Manifest has changed. Check for ztn-config and image changes
      # and download the files from the controller.
      oldManifest = None
      if self.status.manifest:
         oldManifest = Manifest.fromJson(
                        json.loads( self.status.manifest ) )

      # Do SWI change check first because there's no value in loading the new
      # config when the dut is going to be reloaded due to the new image
      bootedSwiDigest = self.status.bootSwiDigest.get(
                           manifest.checksumAlgorithm, "" )

      # If it's the first time a switch is upgraded to a SWIM image,
      # there won't be a ztn-boot-info file
      noBootInfo = not list( self.bootInfo )
      if noBootInfo:
         # The SWIM optimization enabled switch platforms will reboot twice if
         # it's their first upgrade. This can be avoided on other platforms by
         # simply comparing the booted image with manifest image hashes
         swiChanged = bootedSwiDigest != manifest.swiChecksum
         bt8( "SWI changed:", swiChanged )
      else:
         # Check the hash of booted swi in the prior life, if any
         preRebootManifestSwiDigest = self.bootInfo.get( "manifestSwiChecksum", "" )
         # At this point, the config file isn't updated with the booted image's
         # digest yet.
         preRebootSwiDigest = self.bootInfo.get( "bootImageChecksum", "" )
         # If booted swi is a shrunk image, it's hash will be different from the
         # manifest's swi hash. So, there are two cases here:
         # 1. New image is downloaded manually and rebooted
         #    In this case, the old and new manifest swi-hash will be same but the
         #    boot images will be different before and after upgrade
         # 2. A new image is available on the controller
         #    In this case, the boot image will be same before and after reboot
         #    but the manifest swi hashes will be different
         # Making hash comparision conditional on swi optimization enabled will
         # over complicate it. So, we'll use ztn-boot-info always.
         t8( ','.join( ( preRebootManifestSwiDigest, manifest.swiChecksum,
             preRebootSwiDigest, bootedSwiDigest ) ) )
         swiChanged = preRebootManifestSwiDigest != manifest.swiChecksum
         bt8( "Controller SWI changed:", swiChanged )
         if not swiChanged:
            # Corner case: When a new image is installed manually after handshake is
            # complete, the switch should be restored to the controller provided
            # image
            swiChanged = preRebootSwiDigest != bootedSwiDigest
            if self.config.skipReboot:
               # In tests, if skip-reboot is configured, the booted image might never
               # be same as the installed image
               swiChanged = False
            bt8( "Installed SWI changed:", swiChanged )

      self.status.swiUrl = manifest.swiUrl
      if swiChanged:
         # Request for image from the controller only for content meta data
         # which is in turn used for flash clean up. The request for image
         # download will be made via 'install' command later.
         swiFileName = swiNameFromUrl( manifest.swiUrl )
         localSwiPath = getRealPath( FLASH_PATH + '/%s' % swiFileName )
         request = Afetch.SimpleHttpGetRequest(
                      manifest.swiUrl,
                      outputPath=localSwiPath,
                      stream=True )
         self.httpRequests[ GetRequest.SWI ] = request
      else:
         bt5( 'Not downloading the image because switch is already running '
              'the same image' )
         self.status.syncStages.swiInstalled = True
         self.status.swiManifestChecksum = manifest.swiChecksum
         self.bootInfo[ 'manifestSwiChecksum' ] = manifest.swiChecksum
         self.bootInfo[ 'bootImageChecksum' ] = bootedSwiDigest

      # Config will be downloaded unconditionally because we can't say if the
      # running-config is different than that after the last sync up. We can
      # find it out by running an exec command but we won't do that because
      # that's the controller's job. This also addresses the case of
      # config replace failing at the last attempt of sync up.
      configChanged = bool( not oldManifest or
                            oldManifest.configChecksum != manifest.configChecksum )
      t8( 'config changed:', configChanged )
      self.status.configUrl = manifest.configUrl
      request = Afetch.SimpleHttpGetRequest(
                  manifest.configUrl,
                  callback=self.processZtnConfig )
      self.httpRequests[ GetRequest.CONFIG ] = request

      # Save the manifest and set the state
      self.status.manifest = manifestJsonStr
      self.status.manifestConfigChecksum[ manifest.checksumAlgorithm ] = \
            manifest.configChecksum

      self.status.syncState = State.configDownloadInProgress

      # Start fetching
      for request in self.httpRequests.values():
         request.start()

      # Make space on flash if needed
      if GetRequest.SWI in self.httpRequests:
         self.doEosUpgrade()

      self.httpRequests.clear()

   @staticmethod
   def runCliCommands( cmds ):
      """
      @cmds : List of CLI commands

      Returns: ( errMsg, [ output, output, ... ] )

      Tracing log:
      # CMD
      OUTPUT
      warnings: WARNINGS
      # CMD
      OUTPUT
      """
      cmds = [ cmd for cmd in cmds if cmd != '!' ]
      t8( 'ZtnConfigSyncer::runCliCommands' )
      request = json.dumps( {
         "jsonrpc": "2.0",
         "method": "runCmds",
         "params": {
            "format": "text",
            "cmds": cmds,
            "version": 1
         },
         "id": "Ztn"
      } )
      with EapiClientLib.EapiClient( sysname=Tac.sysname(),
            disableAaa=True, privLevel=CliCommon.MAX_PRIV_LVL ) as ctx:
         resultGenerator, _ = ctx.sendRpcRequest( request )
         with io.BytesIO() as responseBuffer:
            for i in resultGenerator:
               if i is None:
                  break
               responseBuffer.write( i )
            response = responseBuffer.getvalue().decode()
      response = json.loads( response )
      error = response.get( 'error' )
      if error:
         for cmd, datum in zip( cmds, error[ 'data' ] ):
            t8( '#', cmd )
            output = datum[ 'output' ].strip()
            if output:
               t8( output )
         return error[ 'message' ], None
      outputs = []
      for cmd, result in zip( cmds, response[ 'result' ] ):
         t8( '#', cmd )
         output = result[ 'output' ].strip()
         outputs.append( output )
         if output:
            t8( output )
         warnings = '\n'.join( result.get( 'warnings', [] ) )
         if warnings:
            t8( 'warnings:', warnings )
      return None, outputs

   def maybeSetSyncComplete( self ):
      if self.status.syncStages.complete:
         self.status.syncState = State.complete
         # Schedule the manifest download to check on the steady cycle
         self.scheduleManifestDownload()
         # Give green signal to start DmfIndigo agent
         if not self.status.handshakeComplete:
            self.status.handshakeComplete = True
            self.logger.addMsg( "ZTN handshake complete" )
            Logging.log( ZtnLogs.DMF_CONTROLLER_HANDSHAKE_COMPLETE )
         self.handleResyncComplete()

   @staticmethod
   def processConfigCmds( cmds ):
      """
      EapiClientLib requires converting multiline command
      [ 'banner login', 'hi there', 'EOF' ]
      to
      [ { 'cmd': 'banner login', 'input': 'hi there\nEOF\n' } ]
      """
      convertedCmds = []
      for cmd in cmds:
         if cmd.startswith( 'banner ' ):
            convertedCmds.append( { 'cmd': cmd, 'input': '' } )
         elif ( len( convertedCmds ) != 0 and isinstance( convertedCmds[ -1 ], dict )
                and not convertedCmds[ -1 ][ 'input' ].endswith( 'EOF\n' ) ):
            convertedCmds[ -1 ][ 'input' ] += cmd + '\n'
         else:
            convertedCmds.append( cmd )
      return convertedCmds

   def getMgmtRedundantIntf( self, ztnConfig ):
      res = list( filter(
         lambda conf: '! redundant management interface' in conf, ztnConfig ) )
      if not res:
         return ""
      return res[ 0 ].split()[ -1 ]

   def processZtnConfig( self, response ):
      # 'response' is of type 'requests.models.Response'
      bt5()
      if not response:
         # Downloading ZTN config failed. Could be incorrect URL
         self.status.syncState = State.configDownloadFailed
         Logging.log( ZtnLogs.DMF_CONTROLLER_DOWNLOAD_FAILED, "config" )
         return

      self.logger.addMsg( "Startup-config download complete" )
      t8( "ztn-config =", response.text )
      self.status.syncState = State.configDownloaded
      ztnConfig = response.text.split( "\n" )
      if not ztnConfig:
         bt0( "Received empty ZTN config" )
         self.status.syncState = State.invalidConfig
         self.syslogFailure( "configuration received is empty" )
         self.scheduleManifestDownload()
         return

      # these configs require reboot to take effect
      routingConfig = 'service routing protocols model multi-agent'
      envConfig = "agent KernelFib environment KERNELFIB_PROGRAM_ALL_ECMP='true'"
      errMsg, runningConfig = self.runCliCommands( [
         r'show running-config section ^service\srouting\sprotocols\smodel',
         r'show running-config section ^agent\sKernelFib\senvironment' ] )
      if errMsg:
         bt5( 'failed to get running-config section service routing protocols' )
         rebootRequired = False
      else:
         rebootRequired = (
            ( routingConfig not in runningConfig and routingConfig in ztnConfig ) or
            ( envConfig not in runningConfig and envConfig in ztnConfig ) )

      # Process the config for special commands
      ztnConfig = self.processConfigCmds( ztnConfig )

      # In EosStartupConfigUtils.java, controller sends 'class copp-system-mirroring'
      # CLI command to every Sand switch.
      # During switch boot-up, we may encounter this error:
      # '% Static class copp-system-mirroring is not supported on this platform'
      # which is caused by
      # src/Qos/eos-trunk/CliPlugin/QosCliServicePolicy.py@rev18#line-3337
      # Here we emit a more sensible error message instead of 'invalid config'
      if 'class copp-system-mirroring' in ztnConfig:
         for sliceHwStatus in self.qosSliceHwStatus.values():
            if ( Tac.Type( "Qos::ClassMapCpStaticType" ).cmapCpStaticMirroring
                 in sliceHwStatus.coppStaticClass ):
               self.status.lastKnownError = ''
               break
         else:
            errMsg = "QoS hardware not ready to apply controller configuration"
            bt0( errMsg )
            self.status.lastKnownError = errMsg
            self.status.syncState = State.configSaveError
            self.logger.addMsg( errMsg )
            self.scheduleManifestDownload()
            return

      # Start a new Cli session, run clean config, load the new config
      # save config to startup-config
      configSessionName = 'ztn-%d' % Tac.utcNow()
      sessionCmds = [ 'enable' ]
      sessionCmds.append( 'configure session %s' % configSessionName )
      cmds = sessionCmds[ : ]
      cmds.append( 'rollback clean-config' )
      cmds += ztnConfig
      if self.config.skipReboot:
         cmds += [ 'management dmf', 'handshake reload disabled' ]

      # Save the Ma1 and route config once and keep it forever
      saveConfig, errMsg = self.configSaver.generateConfig(
            requestedMgmtRedundantIntf=self.getMgmtRedundantIntf( ztnConfig ) )
      bt5( 'intfConfig:', v( saveConfig ) )

      if errMsg:
         bt5( 'generateConfig: errMsg', v( errMsg ) )
         self.status.lastKnownError = errMsg
         self.status.syncState = State.configSaveError
         self.syslogFailure( "generateConfig on the switch failed: " + errMsg )
         self.scheduleManifestDownload()
         return

      if saveConfig:
         self.status.nwConnectivityConfig = "\n".join( saveConfig )
      elif self.status.nwConnectivityConfig:
         saveConfig = self.status.nwConnectivityConfig.split( '\n' )
      else:
         bt0( 'Something went seriously wrong. Cross-fingers on connectivity.' )
         self.status.lastKnownError = "Querying user configuration failed"
         self.status.syncState = State.configSaveError
         self.syslogFailure( "applying the config received on the switch failed" )
         self.scheduleManifestDownload()
         return

      cmds += saveConfig
      errMsg, _ = self.runCliCommands( cmds )
      if errMsg:
         self.syslogFailure( "configuration received from controller is invalid" )
         bt0( v( errMsg ) )
         self.logger.addMsg( "Config replace failed. (%s)" % errMsg )
         self.status.lastKnownError = errMsg
         self.status.syncState = State.invalidConfig
         # Abort the config session
         self.runCliCommands( sessionCmds + [ 'abort' ] )
         bt5( 'Aborted the config session' )
         self.scheduleManifestDownload()
         return

      # Add ip hardware fib optimize prefix-length 32 config
      # Restart and wait for SandL3Unicast agent after applying the
      # config
      mgmtRedundancyEnabled = bool( self.getMgmtRedundantIntf( ztnConfig ) )
      if mgmtRedundancyEnabled:
         self.runCliCommands( sessionCmds +
               [ "ip hardware fib optimize prefix-length 32" ] )

      # Add hardware forwarding system profile config, see cl/28504209
      # wait for SandFapNi warmup before applying hardware forwarding profile
      # Workaround for BUG761323
      # ZTN will restart SandFapNi agent on Jericho2 with command
      # "hardware forwarding system profile system-profile-tap-aggregation".
      # And sometimes SandFapNi agent fails to clean up copp status path
      # and cause SandCounter fails to allocate resources for
      # hardware counter features which are used in DmfIndigo.
      for agentName, _ in AgentDirectory.agents( self.sysname ):
         if agentName.startswith( 'SandFapNi' ):
            self.status.lastKnownError = f"wait for { agentName } warmup failed"
            # wait-for-warmup blocks until agent warmup
            errMsg, _ = self.runCliCommands(
                  [ 'enable', f'wait-for-warmup { agentName }' ] )
            if errMsg:
               bt0( v( errMsg ) )
            else:
               self.status.lastKnownError = ''
      self.runCliCommands( sessionCmds +
            [ "hardware forwarding system profile system-profile-tap-aggregation" ] )

      errMsg, _ = self.runCliCommands( sessionCmds +
            [ "commit", "copy running-config startup-config" ] )

      if errMsg:
         bt0( v( errMsg ) )
         self.syslogFailure( "applying configuration failed" )
         self.status.lastKnownError = errMsg
         self.status.syncState = State.configSaveError
         self.logger.addMsg( "Config replace failed. (%s)" % errMsg )
         self.scheduleManifestDownload()
         return
      elif rebootRequired:
         persistZtnTracingLogInPtest()
         self.runCliCommands( [ 'reload all now' ] )
         return # unreachable

      errMsg, outputs = self.runCliCommands( [ 'show running-config digest' ] )
      if errMsg:
         digest = ''
      else:
         digest = outputs[ 0 ]
      if self.status.runningCfgDigest.get( HashAlgo.sha1, None ) != digest:
         # We also need to restart SandL3Unicast agent for the
         # ip hardware fib optimize prefix-length 32 config
         # to take effect and wait for its warmup.
         for agentName, _ in AgentDirectory.agents( self.sysname ):
            if agentName.startswith( 'SandL3Unicast' ):
               self.runCliCommands( [ 'enable', f'agent { agentName } terminate' ] )
               self.status.lastKnownError = f"wait for { agentName } warmup failed"
               # wait-for-warmup blocks until agent warmup
               errMsg, _ = self.runCliCommands(
                     [ 'enable', f'wait-for-warmup { agentName }' ] )
               if errMsg:
                  bt0( v( errMsg ) )
               else:
                  self.status.lastKnownError = ''

         self.status.runningCfgDigest[ HashAlgo.sha1 ] = digest

      self.status.syncState = State.configSaveComplete
      self.status.syncStages.configSaved = True
      self.maybeSetSyncComplete()

   def doEosUpgrade( self ):
      bt5()
      swiReq = self.httpRequests[ GetRequest.SWI ]
      # Stop the request and let 'install' command take charge
      swiReq.stop()
      if swiReq.response is not None:
         swiFileSize = int( swiReq.response.headers[ 'Content-Length' ] )
         t8( 'SWI image size: ', swiFileSize )
         if not self.makeDiskSpaceForImage( swiFileSize ):
            errMsg = "insufficient flash space. Need to cleanup manually"
            bt0( v( errMsg ) )
            # Update status
            self.status.lastKnownError = errMsg
            self.status.syncState = State.swiDownloadFailed
            self.scheduleManifestDownload()
            return
      else:
         self.logger.addMsg( "Image download failed" )
         self.status.syncState = State.swiDownloadFailed
         Logging.log( ZtnLogs.DMF_CONTROLLER_DOWNLOAD_FAILED, "image" )
         return

      manifest = Manifest.fromJson(
                        json.loads( self.status.manifest ) )
      swiName = swiNameFromUrl( manifest.swiUrl )
      preRebootManifestSwiDigest = self.bootInfo.get( "manifestSwiChecksum", "" )
      preRebootSwiDigest = self.bootInfo.get( "bootImageChecksum", "" )
      # If SWI optimization is enabled, the hash of the downloaded SWI should
      # be added to the ztn-boot-info file so that the comparision can be done
      # accurately
      bt5( "Downloading and installing the new SWI" )
      self.status.syncState = State.swiDownloadInProgress
      src = swiReq.url.replace( '://', ':' )
      dest = 'flash:' + swiName
      cmds = [ 'enable',
               f'install source {src} destination {dest}' ]
      errMsg, _ = self.runCliCommands( cmds )
      if errMsg:
         bt0( v( errMsg ) )
         self.logger.addMsg( "Invalid image. (%s)" % errMsg )
         # Revert changes made to ztn-boot-info
         self.bootInfo[ 'manifestSwiChecksum' ] = preRebootManifestSwiDigest
         self.bootInfo[ 'bootImageChecksum' ] = preRebootSwiDigest
         # Update status
         self.status.lastKnownError = errMsg
         self.status.syncState = State.invalidSwi
         self.scheduleManifestDownload()
      else:
         self.status.syncState = State.swiDownloaded
         self.logger.addMsg( "Image download complete (%s)" % swiName )
         # The switch will reboot shortly but we'll set the state anyways
         self.status.syncState = State.swiInstallComplete
         self.status.syncStages.swiInstalled = True

         # Compute the hash of the new image and write to the ztn-boot-log file
         swiPath = installedImagePath()
         installedSwiHash = computeSha256Hash( swiPath )

         # Save the manifest and booted image digest on flash so that it can be
         # used to make upgrade decision after/if the switch reboots
         self.bootInfo[ 'manifestSwiChecksum' ] = manifest.swiChecksum
         self.bootInfo[ 'bootImageChecksum' ] = installedSwiHash

         # Reboot
         if installedSwiHash == self.status.bootSwiDigest.get( HashAlgo.sha256, "" ):
            self.logger.addMsg(
                  "Skipping reload since switch is already running the same image" )
            self.status.syncStages.swiInstalled = True
            self.status.swiManifestChecksum = manifest.swiChecksum
            self.maybeSetSyncComplete()
         elif not self.config.skipReboot:
            bt5( "Rebooting the dut" )
            persistZtnTracingLogInPtest()
            cmds = [ 'enable', 'reload all now' ]
            errMsg, _ = self.runCliCommands( cmds )
            self.logger.addMsg( "Rebooting the switch to upgrade" )
            if errMsg:
               bt0( v( errMsg ) )
               self.logger.addMsg( "Reloading failed. (%s)" % errMsg )
               # Revert changes made to ztn-boot-info
               self.bootInfo[ 'manifestSwiChecksum' ] = preRebootManifestSwiDigest
               self.bootInfo[ 'bootImageChecksum' ] = preRebootSwiDigest
            else:
               self.status.syncState = State.rebooting
         else:
            t8( "handshake reload disabled is configured. Skipped rebooting." )
            self.status.syncStages.swiInstalled = True
            self.status.swiManifestChecksum = manifest.swiChecksum
            self.maybeSetSyncComplete()

   @staticmethod
   def makeDiskSpaceForImage( freeSpaceNeeded ):
      saveBootImage = bool( freeSpaceNeeded <= MIN_FREE_FLASH )
      # workaround for /src/StorageDevices/ReloadPolicyPlugin/ImageSizeCheck.py
      freeSpaceNeeded *= 1.05

      freeSpace = 0
      try:
         fileSysInfo = os.statvfs( '/mnt/flash' )
         freeSpace = fileSysInfo.f_bavail * fileSysInfo.f_bsize
      except OSError:
         # Test environment. no flash mounted
         t8( 'makeDiskSpaceForImage: Test environment. No mounted flash system' )
         return True

      bt5( 'space-needed', v( freeSpaceNeeded ), 'available-space', v( freeSpace ) )
      if freeSpaceNeeded <= freeSpace:
         # Flash has enough free space
         return True

      # Get the installed swi image name
      bootConfigFileName = os.environ.get(
         'BOOT_CONFIG_LOCATION', "flash:/boot-config" )
      bootConfigFileName = bootConfigFileName.replace( "flash:/", "/mnt/flash/" )
      bootConfig = SimpleConfigFile.SimpleConfigFileDict( bootConfigFileName )
      installedSwi = bootConfig.get( 'SWI' )
      installedSwi = installedSwi.replace( "flash:/", "/mnt/flash/" )
      bt5( 'Installed SWI', v( installedSwi ) )

      # Remove all tmp swi files on flash before downloading the new one
      tmpSwiFiles = glob.glob( '/mnt/flash/*.swi_tmp*' )
      for swi in tmpSwiFiles:
         os.remove( swi )
      # Remove swi files on flash before downloading the new one
      files = glob.glob( '/mnt/flash/*' )
      files = [ f for f in files if os.path.isfile( f ) ]
      swiFiles = []
      bt5( 'Flash files', v( files ) )
      for f in files:
         bt5( 'Examine file', v( f ) )
         try:
            fStatus = subprocess.run(
               [ "swi", "info", f ], capture_output=True, check=True, timeout=10 )
            bt5( 'swi info', v( fStatus.stdout ) )
            if "SWI_VERSION=" in str( fStatus.stdout ):
               bt5( 'Identified SWI file', v( f ) )
               swiFiles.append( f )
         except subprocess.CalledProcessError:
            bt5( 'Error examining file', v( f ) )
         except subprocess.TimeoutExpired:
            bt5( 'Timeout examining file', v( f ) )
      swiFiles = sorted( swiFiles, key=os.path.getmtime )
      bt5( 'SWI files', v( swiFiles ) )
      if installedSwi in swiFiles:
         # Look at the installed swi last
         swiFiles.remove( installedSwi )
         if saveBootImage:
            bt5( 'Save installed SWI', v( swiFiles ) )
         else:
            swiFiles.append( installedSwi )
            bt5( 'Installed SWI appended to the end', v( swiFiles ) )
      for swi in swiFiles:
         fileSize = os.stat( swi ).st_size
         bt5( v( f'Cleaning up swi: {swi}, file size: {fileSize}' ) )
         os.remove( swi )
         freeSpace += fileSize
         bt5( 'New free space', v( freeSpace ) )
         if freeSpaceNeeded <= freeSpace:
            # Flash has enough free space now
            bt5( 'Freed enough space' )
            break
      else:
         Logging.log( ZtnLogs.DMF_CONTROLLER_FLASH_FULL )
         return False
      return True

   def resetZtnStatus( self ):
      bt5()
      self.status.manifest = ''
      self.status.manifestTs = 0
      self.status.syncState = State.none
      self.status.syncStages.reset()
      self.status.lastKnownError = ''

   def doCleanup( self, resetState=True ):
      bt5()
      if resetState:
         self.resetZtnStatus()
         self.transactionMgr = None
      self.httpRequests.clear()
      self.primaryController = None
      self.manifestRetryTimer.timeMin = Tac.endOfTime

class ConfigMonitor:
   def __init__( self, config, dmfConfig, status, hwEntMib,
         ztnConfigSyncer ):
      self.config = config
      self.dmfConfig = dmfConfig
      self.status = status
      self.hwEntMib = hwEntMib
      self.ztnConfigSyncer = ztnConfigSyncer
      self.hwInitReactor = None
      self.hwMibReactor = GenericReactor( self.hwEntMib,
                                          [ "systemMacAddr", "root" ],
                                          self.handleHwInit )
      self.controllerReactor = GenericReactor( self.dmfConfig,
                                                [ 'controller' ],
                                                self.handleController )
      self.forceSyncReactor = GenericReactor( self.config,
                                              [ 'forceSync' ],
                                              self.handleForceSync )
      self.srcIntfReactor = GenericReactor( self.config,
                                            [ 'sourceIntf' ],
                                            self.handleSrcIntf )
      self.enabledReactor = GenericReactor( self.dmfConfig,
                                            [ 'enabled' ],
                                            self.handleEnabled,
                                            callBackNow=True )
      self.handshakeReactor = GenericReactor( self.config,
                                              [ 'bypassHandshake' ],
                                              self.handleHandshakeBypass,
                                              callBackNow=True )

   def isZtnReady( self ):
      macAddrZero = Tac.Type( "Arnet::EthAddr" ).ethAddrZero
      ready = bool( self.dmfConfig.enabled and
                    self.hwEntMib.systemMacAddr != macAddrZero and
                    self.hwEntMib.root and self.hwEntMib.root.modelName )
      bt5( v( ready ) )
      return ready

   def handleReady( self ):
      if self.isZtnReady():
         self.ztnConfigSyncer.doInit()
         for controllerIp in self.dmfConfig.controller:
            self.handleController( key=controllerIp )
      else:
         self.ztnConfigSyncer.doCleanup()

   def handleEnabled( self, notifiee=None ):
      bt5( v( self.dmfConfig.enabled ) )
      self.status.enabled = self.dmfConfig.enabled
      self.handleReady()

   def handleHwInit( self, notifiee=None ):
      bt5( v( self.hwEntMib.systemMacAddr ) )
      if self.hwEntMib.root and not self.hwInitReactor:
         self.hwInitReactor = GenericReactor( self.hwEntMib.root,
                                              [ "modelName" ],
                                              self.handleHwInit )
      self.handleReady()

   def handleController( self, notifiee=None, key=None ):
      controllerIp = key
      bt5( v( controllerIp ) )
      if not controllerIp:
         return
      if controllerIp in self.dmfConfig.controller:
         self.ztnConfigSyncer.doAddController( controllerIp )
      else:
         self.ztnConfigSyncer.doDelController( controllerIp )

   def handleForceSync( self, notifiee=None ):
      bt5()
      if self.isZtnReady():
         self.ztnConfigSyncer.doResync()

   def handleSrcIntf( self, notifiee=None ):
      ''' Used only for testing '''
      bt5( v( self.config.sourceIntf ) )
      intf = DeviceNameLib.eosIntfToKernelIntf( self.config.sourceIntf )
      ztnTxnMgr = self.ztnConfigSyncer.transactionMgr
      if ztnTxnMgr and ztnTxnMgr.interface != intf:
         self.ztnConfigSyncer.doCleanup()
         self.handleReady()

   def handleHandshakeBypass( self, notifiee=None ):
      ''' Used only for testing '''
      if self.config.bypassHandshake:
         t8( "bypassHandshake =", self.config.bypassHandshake )
         self.status.handshakeComplete = True

class ZtnAgent( Agent.Agent ):
   def __init__( self, entityMgr, **kwargs ):
      Agent.Agent.__init__( self, entityMgr, agentName=name() )
      qtfile = "{}{}.qt".format( self.agentName, "-%d" if "QUICKTRACEDIR"
                                 not in os.environ else "" )
      BothTrace.initialize( qtfile, "8,1024,8,8,8,1024,8,8,1024,8",
                            maxStringLen=80 )
      bt1()
      self.ztnConfigSyncer = None
      self.configMonitor = None
      self.logger = None

   def doInit( self, entityManager ):
      # pylint: disable-msg=attribute-defined-outside-init
      mg = entityManager.mountGroup()
      self.config = mg.mount( "ztn/config", "Ztn::Config", "r" )
      self.status = mg.mount( "ztn/status", "Ztn::Status", "w" )
      self.dmfConfig = mg.mount( "dmf/cli/config", "Dmf::Cli::Config", "r" )
      self.hwEntMib = mg.mount( "hardware/entmib", "EntityMib::Status", "r" )
      self.qosSliceHwStatus = mg.mount(
            f"cell/{ Cell.cellId() }/qos/hardware/status/slice", "Tac::Dir", "ri" )

      def _mountsComplete():
         bt5()
         Logging.log( ZtnLogs.DMF_CONTROLLER_HANDSHAKE_INIT )
         self.logger = ZtnLogger.ZtnLogger( self.status )
         self.logger.addMsg( "ZTN init" )
         self.generateBootSwiDigest( self.status )
         self.configSyncer = ZtnConfigSyncer( self.hwEntMib,
                                              self.config,
                                              self.status,
                                              self.logger,
                                              self.entityManager.sysname(),
                                              self.qosSliceHwStatus )
         self.configMonitor = ConfigMonitor( self.config,
                                             self.dmfConfig,
                                             self.status,
                                             self.hwEntMib,
                                             self.configSyncer )
      mg.close( _mountsComplete )

   def generateBootSwiDigest( self, status ):
      bt5( 'Saving sha256 digest for the booted SWI' )
      swiPath = getRealPath( BOOTED_IMAGE_SWI_PATH )
      try:
         shasum = computeSha256Hash( swiPath )
         bt1( v( shasum ) )
         self.status.bootSwiDigest[ HashAlgo.sha256 ] = shasum
      except OSError as e:
         bt0( "Computing boot swi digest failed:", v( e ) )
