# Copyright (c) 2023 Arista Networks, Inc.  All rights reserved.
# Arista Networks, Inc. Confidential and Proprietary.

# pylint: disable=consider-using-f-string

from ContainerMgrCommon import runCmd
import Logging
import QuickTrace
import Tac
import Tracing
import Url
import os
import shlex
import sys

traceHandle = Tracing.Handle( "ContainerMgrContainerSm" )

t0 = traceHandle.trace0 # function calls
t1 = traceHandle.trace1 # error/exception
t2 = traceHandle.trace2 # login/logout from a registry
t3 = traceHandle.trace3 # container info traces

qv = QuickTrace.Var
qt0 = QuickTrace.trace0 # Important function calls
qt1 = QuickTrace.trace1 # error/exception
qt2 = QuickTrace.trace2 # Other important info

ContainerRunState = Tac.Type( "ContainerMgr::ContainerRunState" )
ContainerParams = Tac.Type( "ContainerMgr::ContainerParams" )

CpuShares = Tac.Type( 'ContainerMgr::CpuShares' )
CpuCores = Tac.Type( 'ContainerMgr::CpuCores' )
MemoryHardLimit = Tac.Type( 'ContainerMgr::MemoryHardLimit' )
MemorySoftLimit = Tac.Type( 'ContainerMgr::MemorySoftLimit' )
SecurityMode = Tac.Type( 'ContainerMgr::SecurityMode' )
NetworkingMode = Tac.Type( 'ContainerMgr::NetworkingMode' )
LoggingDriver = Tac.Type( 'ContainerMgr::LoggingDriver' )
RestartPolicy = Tac.Type( 'ContainerMgr::RestartPolicy' )
StartupConditionPollInterval = \
      Tac.Type( 'ContainerMgr::StartupConditionPollInterval' )

startupConditionScriptDir = "/run/ContainerMgrStartupCondition"

def startupConditionScriptTimeout():
   return int( os.environ.get( 'CMGR_TEST_STARTUP_CONDITION_TIMEOUT',
                               '15' ) )

CONTAINERMGR_CONTAINER_STARTED = None
CONTAINERMGR_CONTAINER_STARTED = Logging.LogHandle(
      "CONTAINERMGR_CONTAINER_STARTED",
      severity=Logging.logInfo,
      fmt='Container %s started.',
      explanation='Configured container started successfully.',
      recommendedAction=Logging.NO_ACTION_REQUIRED )

CONTAINERMGR_CONTAINER_REMOVED = None
CONTAINERMGR_CONTAINER_REMOVED = Logging.LogHandle(
      "CONTAINERMGR_CONTAINER_REMOVED",
      severity=Logging.logInfo,
      fmt='Container %s stopped and removed on configuration change.',
      explanation=( 'Running container was stopped and removed '
                    'due to configuration change.' ),
      recommendedAction=Logging.NO_ACTION_REQUIRED )

CONTAINERMGR_CONTAINER_START_FAILED = None
CONTAINERMGR_CONTAINER_START_FAILED = Logging.LogHandle(
      "CONTAINERMGR_CONTAINER_START_FAILED",
      severity=Logging.logError,
      fmt='Container %s failed to run, '
          'container-runtime reported error "%s" while running "%s".',
      explanation="Configured container didn't start successfully.",
      recommendedAction="If the problem persists, "
                        "please check container configuration."
                        " Otherwise, no action required." )

CONTAINERMGR_CONTAINER_STARTUP_CONDITION_BASH_TIMEOUT = None
CONTAINERMGR_CONTAINER_STARTUP_CONDITION_BASH_TIMEOUT = Logging.LogHandle(
      "CONTAINERMGR_CONTAINER_STARTUP_CONDITION_BASH_TIMEOUT",
      severity=Logging.logError,
      fmt='Startup condition bash commands for Container %s timed out after'
          ' %d seconds',
      explanation="Startup condition bash commands didn't exit immediately"
                  " as expected",
      recommendedAction="If the problem persists, "
                        "please check the startup condition bash configuration."
                        " Otherwise, no action required." )

class ContainerConfigReactor( Tac.Notifiee ):
   notifierTypeName = "ContainerMgr::ContainerConfig"

   def __init__( self, notifier, master ):
      Tac.Notifiee.__init__( self, notifier )
      self.master_ = master
      self.containerConfig = notifier

      self.failureRetryNotifiee_ = Tac.ClockNotifiee()
      self.failureRetryNotifiee_.handler = self.handleFailureRetry
      self.failureRetryNotifiee_.timeMin = Tac.endOfTime
      self.failureRetryInterval = 30

      self.startupConditionPollNotifiee_ = Tac.ClockNotifiee()
      self.startupConditionPollNotifiee_.handler = self.handleStartupConditionPoll
      self.startupConditionPollNotifiee_.timeMin = Tac.endOfTime
      self.startupConditionPollCounter = 0

      self.containerStatusDir = master.containerStatusDir
      self.containerStatus = \
            self.containerStatusDir.newContainer( self.containerConfig.name )

      self.handleContainerConfig()

   @staticmethod
   def isContainerRunning( name ):
      cmd = [ 'docker', 'ps', '-q', '-f', 'name=%s' % name ]
      output = Tac.run( cmd, stdout=Tac.CAPTURE, stderr=sys.stderr )
      return bool( output )

   @staticmethod
   def isContainerCreated( name ):
      cmd = [ 'docker', 'ps', '-a', '-q', '-f',
               'name=%s' % name ]
      output = Tac.run( cmd, stdout=Tac.CAPTURE )
      return bool( output )

   @staticmethod
   def stopContainer( name, assertFail=False ):
      if not ContainerConfigReactor.isContainerRunning( name ):
         # nothing to do
         return False

      # note that this can take 10 secs if container doesn't
      # handle SIGTERM
      stopCmd = [ "docker", "stop", name ]
      hasFailed, _ = runCmd( stopCmd,
                             "Stopping container %s" %
                             name )
      if assertFail:
         assert not hasFailed, "docker stop failed"
      return hasFailed

   @staticmethod
   def removeContainer( name, force=False, assertFail=False ):
      if not ContainerConfigReactor.isContainerCreated( name ):
         # nothing to do
         return False

      removeCmd = [ 'docker', 'rm' ]
      removeCmd += [ '-f' ] if force else []
      removeCmd += [ name ]
      hasFailed, _ = runCmd( removeCmd,
                             "Removing container %s" %
                             name )
      if assertFail:
         assert not hasFailed, "docker rm failed"
      Logging.log( CONTAINERMGR_CONTAINER_REMOVED,
                   name )
      return hasFailed

   def evaluateStartupCondition( self ):
      startupConditionBashCmd = self.containerConfig.params.startupConditionBash
      if startupConditionBashCmd != "":
         if '\n' in self.containerConfig.params.startupConditionBash:
            filename = f"{self.containerConfig.name}.sh"
            startupConditionScriptFilePath = \
                  os.path.join( startupConditionScriptDir, filename )
            if not os.path.exists( startupConditionScriptDir ):
               os.mkdir( startupConditionScriptDir )
            with open( startupConditionScriptFilePath, "w" ) as f:
               f.write( startupConditionBashCmd )
            os.chmod( startupConditionScriptFilePath, 0o777 )
            shCmd = startupConditionScriptFilePath
            t1( "Container:", self.containerConfig.name,
                "Evaluating startup condition by running script", shCmd )
            qt1( "Container:", qv( self.containerConfig.name ),
                 "Evaluating startup condition by running script", qv( shCmd ) )
         else:
            shCmd = startupConditionBashCmd
            t1( "Container:", self.containerConfig.name,
                "Evaluating startup condition by running inline shell command" )
            qt1( "Container:", qv( self.containerConfig.name ),
                "Evaluating startup condition by running inline shell command" )

         if 'CMGR_DEBUG_STARTUP_CONDITION' in os.environ:
            # Dump shell logs to agent log for debugging
            # TODO: Instead of this have separate log fine /var/log/containerMgr
            scStdout = Tac.INHERIT
            scStderr = Tac.INHERIT
         else:
            scStdout = Tac.DISCARD
            scStderr = Tac.DISCARD

         env = os.environ.copy()
         # bash hits AddressSanitizer leaks for some reason
         if 'ASAN_OPTIONS' in os.environ:
            env.pop( 'LD_PRELOAD', None )
            env.pop( 'ASAN_OPTIONS' )

         failed = False
         timedOut = False
         try:
            Tac.run( [ "/usr/bin/sh", "-c", shCmd ],
                     stdout=scStdout, stderr=scStderr,
                     asRoot=True,
                     env=env,
                     timeout=startupConditionScriptTimeout() )
         except Tac.SystemCommandError:
            failed = True
            t1( "Container:", self.containerConfig.name,
                "Startup condition evaluated as false" )
            qt1( "Container:", qv( self.containerConfig.name ),
                 "Startup condition evaluated as false" )
         except Tac.Timeout:
            timedOut = True
            t1( "Container:", self.containerConfig.name,
                "Timed out while evaluating startup condition" )
            qt1( "Container:", qv( self.containerConfig.name ),
                 "Timed out while evaluating startup condition" )
            Logging.log( CONTAINERMGR_CONTAINER_STARTUP_CONDITION_BASH_TIMEOUT,
                         self.containerConfig.name,
                         startupConditionScriptTimeout() )
         # Startup condition is evaluated to true only if the bash commands
         # exit with EXIT_SUCCESS.
         result = not ( failed or timedOut )
         if result:
            t1( "Container:", self.containerConfig.name,
                "Startup condition evaluated as true" )
            qt1( "Container:", qv( self.containerConfig.name ),
                 "Startup condition evaluated as true" )
      else:
         # No startup condition configured
         result = True
      return result

   def volumeMountOptions( self ):
      ctrPaths = sorted( self.containerConfig.bindMount.keys() )
      volumeMountOptions = []
      for ctrPath in ctrPaths:
         configuredHostPath = self.containerConfig.bindMount[ ctrPath ].hostPath
         hostPath = Url.parseUrl( configuredHostPath, None ).localFilename()
         volumeMountOption = [ '-v', f"{hostPath}:{ctrPath}" ]
         volumeMountOptions.extend( volumeMountOption )
      return volumeMountOptions

   def envVarOptions( self ):
      envEntries = list() # pylint: disable=use-list-literal
      environmentConfig = self.containerConfig.params.environment
      if environmentConfig != "":
         envEntries = environmentConfig.split( ':' )

      envVarOptions = list() # pylint: disable=use-list-literal
      for entry in envEntries:
         envVarOptions.extend( [ '-e', entry ] )
      return envVarOptions

   def runContainer( self ):
      params = self.containerConfig.params
      assert params.imageName != ""

      cmdPrefix = [ 'env', '-u', 'HOME', 'docker', 'run' ]
      cmdOptions = []
      containerCmd = []

      cmdOptions += [ '--name', self.containerConfig.name ]
      cmdOptions += [ '--detach' ]

      cpuShares = ( params.cpuShares if params.cpuShares != CpuShares.invalid
                    else CpuShares.defaultValue )
      cmdOptions += [ '--cpu-shares', str( cpuShares ) ]

      cpuCores = ( params.cpuCores if params.cpuCores != CpuCores.invalid
                   else CpuCores.defaultValue )
      if cpuCores:
         cmdOptions += [ '--cpuset-cpus', cpuCores ]

      memoryHardLimit = ( params.memoryHardLimit
                          if params.memoryHardLimit != MemoryHardLimit.invalid
                          else MemoryHardLimit.defaultValue )
      cmdOptions += [ '--memory', memoryHardLimit ]

      if params.memorySoftLimit != MemorySoftLimit.invalid:
         cmdOptions += [ '--memory-reservation',
                         params.memorySoftLimit ]

      if params.restartPolicy == RestartPolicy.restartPolicyUnconfigured:
         cmdOptions += [ '--restart', 'on-failure:10' ]
      elif params.restartPolicy == RestartPolicy.restartPolicyAlways:
         cmdOptions += [ '--restart', 'always' ]
      else:
         assert params.restartPolicy == RestartPolicy.restartPolicyNoRestart
         cmdOptions += [ '--restart', 'no' ]

      if params.secMode != SecurityMode.secModeUnconfigured:
         assert params.secMode == SecurityMode.secModePrivileged
         cmdOptions += [ '--privileged' ]

      if params.netMode != NetworkingMode.netModeUnconfigured:
         assert params.netMode == NetworkingMode.netModeHost
         cmdOptions += [ '--network', 'host' ]

      if params.loggingDriver != LoggingDriver.driverUnconfigured:
         assert params.loggingDriver == LoggingDriver.driverSyslog
         cmdOptions += [ '--log-driver', 'syslog' ]
         cmdOptions += [ '--log-opt', 'syslog-facility=local4' ]
         cmdOptions += [ '--log-opt', 'tag="{{.Name}}"' ]

      if params.options:
         cmdOptions += params.options.split()
      if params.command:
         containerCmd = shlex.split( params.command )

      cmdOptions += self.envVarOptions()
      cmdOptions += self.volumeMountOptions()

      cmd = cmdPrefix + cmdOptions + [ params.imageName ] + containerCmd

      hasFailed, errText = runCmd( cmd,
                                   "Running container %s with cmd %s" %
                                   ( self.containerConfig.name, cmd ) )
      cmdStr = " ".join( cmd )
      return hasFailed, cmdStr, errText

   def attemptToScheduleFailureRetry( self ):
      ''' Returns if failure retry was scheduled'''
      if ( self.containerStatus.failureRetryCounter <
           self.containerStatus.maxFailureRetryCount ):
         self.containerStatus.failureRetryCounter += 1

         t1( "Scheduling failure retry %d for container %s" %
             ( self.containerStatus.failureRetryCounter,
               self.containerConfig.name ) )
         qt1( "Scheduling failure retry",
              qv( self.containerStatus.failureRetryCounter ),
              "for container", self.containerConfig.name )
         self.failureRetryNotifiee_.timeMin = Tac.now() + self.failureRetryInterval
         return True
      else:
         t1( "Failure Retries exhausted for container %s, marking failed" %
             self.containerConfig.name )
         qt1( "Failure Retries exhausted for container", self.containerConfig.name,
              ", marking failed" )
         self.failureRetryNotifiee_.timeMin = Tac.endOfTime
         return False

   def clearFailureRetryState( self ):
      self.containerStatus.failureRetryCounter = 0
      self.failureRetryNotifiee_.timeMin = Tac.endOfTime

   def clearStartupConditionPollState( self ):
      t1( "Container:", self.containerConfig.name,
          "clearStartupConditionPollState" )
      qt1( "Container:", qv( self.containerConfig.name ),
           "clearStartupConditionPollState" )
      self.startupConditionPollCounter = 0
      self.startupConditionPollNotifiee_.timeMin = Tac.endOfTime

   def isStatusUpToDate( self ):
      paramsUpToDate = ( self.containerConfig.params ==
                         self.containerStatus.params )
      if not paramsUpToDate:
         return False

      enabledUpToDate = ( self.containerConfig.enabled ==
                          self.containerStatus.enabled )
      if not enabledUpToDate:
         return False

      # See if bind mounts are up-to-date
      if ( len( self.containerConfig.bindMount ) !=
           len( self.containerStatus.bindMount ) ):
         return False
      for ctrPath, bindMountEntry in self.containerConfig.bindMount.items():
         if ctrPath not in self.containerStatus.bindMount:
            return False
         if bindMountEntry != self.containerStatus.bindMount[ ctrPath ]:
            return False

      # Reaching here means everything is up-to-date
      return True

   def maybeStartContainer_( self,
                             skipCleanup,
                             skipRunnabilityCheck ):
      '''Container is started if configuration is complete'''
      t1( "maybeStartContainer_ called. Container is %s" %
          self.containerConfig.name )
      qt2( "maybeStartContainer_ called. Container is",
           qv( self.containerConfig.name ) )

      if not skipCleanup:
         # Cleanup any existing container by same name
         ContainerConfigReactor.stopContainer( self.containerConfig.name )
         ContainerConfigReactor.removeContainer( self.containerConfig.name )

         # Cleanup status
         self.containerStatus.enabled = False
         self.containerStatus.params = ContainerParams()
         self.containerStatus.bindMount.clear()

      containerState = None
      enableContainer = ( self.containerConfig.enabled and
                          self.containerConfig.params.imageName != "" )

      containerReadyForStartup = False
      if enableContainer:
         if skipRunnabilityCheck:
            containerReadyForStartup = True
         else:
            containerReadyForStartup = self.evaluateStartupCondition()

         if containerReadyForStartup:
            t1( "Container:", self.containerConfig.name,
                "Stopping polling for startup condition" )
            qt1( "Container:", qv( self.containerConfig.name ),
                "Stopping polling for startup condition" )
            self.clearStartupConditionPollState()
         else:
            t1( "Container:", self.containerConfig.name,
                "Scheduling next poll for startup condition" )
            qt1( "Container:", qv( self.containerConfig.name ),
                "Scheduling next poll for startup condition" )
            containerState = ContainerRunState.ctrStatePollStartupCondition
            pollInterval = StartupConditionPollInterval(
                  self.containerConfig.params.startupConditionPollInterval )
            pollIntervalValue = pollInterval.valueOrDefault()
            self.startupConditionPollNotifiee_.timeMin = (
                  Tac.now() + pollIntervalValue )
      else:
         containerState = ContainerRunState.ctrStateInactive

      cmdStr = ""
      errText = ""
      if containerReadyForStartup:
         retryScheduled = False
         hasFailed, cmdStr, errText = self.runContainer()
         if hasFailed:
            Logging.log( CONTAINERMGR_CONTAINER_START_FAILED,
                         self.containerConfig.name,
                         errText,
                         cmdStr )
            retryScheduled = self.attemptToScheduleFailureRetry()
         else:
            self.containerStatus.params = self.containerConfig.params
            self.containerStatus.enabled = True
            for bindMountEntry in self.containerConfig.bindMount.values():
               self.containerStatus.addBindMount( bindMountEntry )
            self.clearFailureRetryState()
            Logging.log( CONTAINERMGR_CONTAINER_STARTED,
                         self.containerConfig.name )

         containerState = (
               ContainerRunState.ctrStateSuccess if not hasFailed
               else ContainerRunState.ctrStateFailureRetry if retryScheduled
               else ContainerRunState.ctrStateFailed )

      self.containerStatus.state = containerState
      self.containerStatus.errText = errText
      self.containerStatus.implCmd = cmdStr

   def handleContainerConfig( self ):
      t1( "handleContainerConfig called. Container is %s" %
          self.containerConfig.name )
      qt2( "handleContainerConfig called. Container is",
           qv( self.containerConfig.name ) )
      if not self.master_.daemonStarted():
         t1( "handleContainerConfig: skip" )
         qt2( "handleContainerConfig: skip" )
         return

      # If config hasn't changed, return early
      if self.isStatusUpToDate():
         t1( "handleContainerConfig: Nothing to do" )
         qt2( "handleContainerConfig: Nothing to do" )
         return

      self.clearStartupConditionPollState()
      self.clearFailureRetryState()
      self.maybeStartContainer_( skipCleanup=False,
                                 skipRunnabilityCheck=False )

   def handleFailureRetry( self ):
      t1( "Container:", self.containerConfig.name,
          "handleFailureRetry: called" )
      qt2( "Container:", qv( self.containerConfig.name ),
           "handleFailureRetry: called" )

      assert self.containerStatus.state == ContainerRunState.ctrStateFailureRetry
      self.maybeStartContainer_( skipCleanup=False,
                                 skipRunnabilityCheck=True )

   def handleStartupConditionPoll( self ):
      t1( "Container:", self.containerConfig.name,
          "handleStartupConditionPoll: called" )
      qt2( "Container:", qv( self.containerConfig.name ),
           "handleStartupConditionPoll: called" )

      assert ( self.containerStatus.state ==
               ContainerRunState.ctrStatePollStartupCondition ), (
             f"Container: {self.containerConfig.name}: handleStartupConditionPoll: "
             f"Unexpected state: {self.containerStatus.state}" )

      self.startupConditionPollCounter += 1
      self.maybeStartContainer_( skipCleanup=True,
                                 skipRunnabilityCheck=False )

   @Tac.handler( 'params' )
   def handleParams( self ):
      t0( "handleParams called" )
      self.handleContainerConfig()

   @Tac.handler( 'enabledState' )
   def handleEnabled( self ):
      t0( "handleEnabled called" )
      self.handleContainerConfig()

   @Tac.handler( 'bindMount' )
   def handleBindMount( self, ctrPath ):
      t0( "handleBindMount called ctrPath:%s", ctrPath )
      self.handleContainerConfig()

   def cleanup( self ):
      containerName = self.containerConfig.name
      if self.master_.daemonStarted():
         ContainerConfigReactor.stopContainer( containerName )
         ContainerConfigReactor.removeContainer( containerName, force=True )
      self.clearFailureRetryState()
      self.clearStartupConditionPollState()
      del self.containerStatusDir.container[ containerName ]

   def close( self ):
      t0( "close for ContainerConfigReactor called" )
      containerName = self.containerConfig.name

      # protect against cleanup being called multiple times
      if containerName in self.containerStatusDir.container:
         self.cleanup()

      Tac.Notifiee.close( self )
