# Copyright (c) 2018 Arista Networks, Inc.  All rights reserved.
# Arista Networks, Inc. Confidential and Proprietary.

import Tracing
import Logging
import SuperServer
import Tac
import ArchiveLib
import SpaceMgmtLib
import CEosHelper
import collections
import os
import Cell
from Toggles.LogArchiverToggleLib import toggleDisableLowStorageLifetimeEnabled

# pylint: disable-msg=W1401
traceHandle = Tracing.Handle( "LogArchiver" )
t0 = traceHandle.trace0

EOS_ARCHIVE_NO_CONFIG_ERROR = Logging.LogHandle(
              'EOS_ARCHIVE_NO_CONFIG_ERROR',
              severity=Logging.logError,
              fmt='Unable to get the currently configured archive',
              explanation=( 'File %s does not exist.'
                            % ArchiveLib.Archive.configFilePath ),
              recommendedAction=Logging.CALL_SUPPORT_IF_PERSISTS )

EOS_ARCHIVE_INV_CONFIG_ERROR = Logging.LogHandle(
              'EOS_ARCHIVE_INV_CONFIG_ERROR',
              severity=Logging.logError,
              fmt='Unable to use the currently configured archive',
              explanation=( 'Unable to read the configuration file %s'
                            ' or its content is invalid.'
                            % ArchiveLib.Archive.configFilePath ),
              recommendedAction=Logging.CALL_SUPPORT_IF_PERSISTS )

EOS_ARCHIVE_QUOTACMD_ERROR = Logging.LogHandle(
              'EOS_ARCHIVE_QUOTACMD_ERROR',
              severity=Logging.logError,
              fmt='An error occured managing linux quota: %s',
              explanation=( 'Updating quota limit failed. It could mean that the'
                            ' filesystem is mounted without quota options or that'
                            ' the quota database file aquota.user is missing or'
                            ' corrupted.' ),
              recommendedAction=Logging.CALL_SUPPORT_IF_PERSISTS )

EOS_ARCHIVE_FS_ERROR = Logging.LogHandle(
              'EOS_ARCHIVE_FS_ERROR',
              severity=Logging.logError,
              fmt=( 'An error occurred getting filesystem information'
                    ' for the archive destination path: %s' ),
              explanation=( 'We are unable to get filesystem information for the'
                            ' destination path or the mountpoint of the filesystem.'
                            ' This means the path does not exist or the filesystem'
                            ' is not mounted.' ),
              recommendedAction=Logging.CALL_SUPPORT_IF_PERSISTS )

EOS_ARCHIVE_DISABLED_STORAGE_LIFETIME = Logging.LogHandle(
              'EOS_ARCHIVE_DISABLED_STORAGE_LIFETIME',
              severity=Logging.logInfo,
              fmt='LogArchiver has been disabled to prolong storage lifetime',
              explanation=( 'LogArchiver has been automatically disabled to prolong'
                            ' the lifetime of the storage device. If more than one'
                            ' storage device is present, you can re-enable'
                            ' LogArchiver by changing the logging destination using'
                            ' `management archive`.' ),
              recommendedAction=Logging.NO_ACTION_REQUIRED )

class ActionQueue:
   def __init__( self ):
      self.queue = collections.deque( [] )
      self.clockNotifie = Tac.ClockNotifiee( timeMin=Tac.endOfTime,
                                             handler=self.flush )

   def currentArchive( self ):
      try:
         t0( 'retrieve current archive' )
         archive = ArchiveLib.Archive.currentArchive()
      except AssertionError as e:
         Logging.log( EOS_ARCHIVE_FS_ERROR, e )
         return None

      if archive is None:
         Logging.log( EOS_ARCHIVE_INV_CONFIG_ERROR )
      elif archive is False:
         Logging.log( EOS_ARCHIVE_NO_CONFIG_ERROR )
         return None

      return archive

   def enqueue( self, func ):
      t0( f'enqueue action for {func.__name__}' )
      archive = self.currentArchive()
      if archive is None:
         # We drop the action if the archive can not be fetched
         return

      if not self.queue:
         self.clockNotifie.timeMin = 0
      self.queue.append( { 'func' : func, 'archive' : archive, 'try' : 1 } )

   def flush( self ):
      reschedulingTime = 10
      startTime = Tac.now()

      def execTime():
         return Tac.now() - startTime

      try:
         # If we stay in this handler too long, SuperServer is going to miss a
         # heartbeat and get killed. See BUG289813.
         while self.queue and execTime() < 10:
            action = self.queue.popleft()
            t0( 'exec %s (try number %d)'
                % ( action[ 'func' ].__name__, action[ 'try' ] ) )
            action[ 'func' ]( action[ 'archive' ] )
      except Tac.Timeout:
         t0( 'timeout executing %s (try number %d)'
              % ( action[ 'func' ].__name__, action[ 'try' ] ) )
         action[ 'try' ] += 1
         self.queue.appendleft( action )
         t0( 'rescheduling clock notifie in %d secs' % reschedulingTime )
         self.clockNotifie.timeMin = Tac.now() + reschedulingTime
      else:
         if self.queue:
            t0( 'rescheduling clock notifie in 1 secs for heartbeat' )
            self.clockNotifie.timeMin = Tac.now() + 1

class SingleAttrSubReactor( Tac.Notifiee ):
   """
   Reacts to a single property change on a type and calls the function passed in
   """
   notifierTypeName = "*"

   def __init__( self, notifier, fn ):
      super().__init__( notifier )
      self.fn = fn

# Repeat react function to avoid having to use meta classes
class DeviceSmartDataReactor( SingleAttrSubReactor ):
   notifierTypeName = "StorageDevices::DeviceSmartData"
   fieldName = "lifetimeRemaining"

   @Tac.handler( fieldName )
   def react( self ):
      self.fn()

class DeviceNvmeDataReactor( SingleAttrSubReactor ):
   notifierTypeName = "StorageDevices::DeviceNvmeData"
   fieldName = "lifetimeRemaining"

   @Tac.handler( fieldName )
   def react( self ):
      self.fn()

class ConnectedDevicesReactor( SingleAttrSubReactor ):
   notifierTypeName = "StorageDevices::ConnectedDevices"
   fieldName = "disk"

   @Tac.handler( fieldName )
   def react( self, _ ):
      self.fn()

class ArchiveStorageDestReactor( SingleAttrSubReactor ):
   notifierTypeName = "Mgmt::Archive::Config"
   fieldName = "dest"

   @Tac.handler( fieldName )
   def react( self ):
      self.fn()

class StorageStatusReactor( Tac.Notifiee ):
   """
   Reacts to StorageDevices::Status and registers sub handlers. Since both super
   server and storage devices start at switch boot it is possible for this file to
   run before storage devices initializes the sub structs such as connectedDevices
   """
   notifierTypeName = "StorageDevices::Status"

   def __init__( self, storageStatus, fn ):
      super().__init__( storageStatus )
      self.storageStatus = storageStatus
      self.fn = fn

      if storageStatus.connectedDevices is not None:
         self.setupConnectedDevices()
      if storageStatus.smartData is not None:
         self.setupSmartData()
      if storageStatus.nvmeData is not None:
         self.setupNvmeData()

   @Tac.handler( "connectedDevices" )
   def setupConnectedDevices( self ):
      self.connectedDeviceReactor = (
         ConnectedDevicesReactor( self.storageStatus.connectedDevices, self.fn ) )

   @Tac.handler( "nvmeData" )
   def setupNvmeData( self ):
      self.nvmeDataReactor = Tac.collectionChangeReactor(
         self.storageStatus.nvmeData.deviceData,
         DeviceNvmeDataReactor,
         reactorArgs=( self.fn, ) )

   @Tac.handler( "smartData" )
   def setupSmartData( self ):
      self.smartDataReactor = Tac.collectionChangeReactor(
         self.storageStatus.smartData.deviceData,
         DeviceSmartDataReactor,
         reactorArgs=( self.fn, ) )

class StorageLifetimeReactor:
   """
   Reacts to various sysdb events that would trigger a change in the storage lifetime
   of the drive storing log archives
   """
   def __init__( self, storageStatus, archiveConfig, actionQueue: ActionQueue ):
      self.actionQueue = actionQueue
      self.storageStatus = storageStatus
      self.archiveConfig = archiveConfig
      self.storageLifetime = 100
      self.subReactors = (
         StorageStatusReactor( storageStatus, self.handleStorageStatus ),
         ArchiveStorageDestReactor( archiveConfig, self.handleStorageStatus )
      )
      # Set the storage status since it is not set by archive-init
      self.handleStorageStatus()

   def handleStorageStatus( self ):
      """This handler runs when any of the sub reactors fires"""
      # Check to see if StorageDevices::Status has been initialized yet
      if ( self.storageStatus.connectedDevices is None or
           self.storageStatus.smartData is None or
           self.storageStatus.nvmeData is None ):
         return

      driveName = self.archiveDriveName()
      if driveName is None:
         return
      busType = self.storageStatus.connectedDevices.disk[ driveName ].bus

      newStorageLifetime = 100
      if busType == 'ata':
         drive = self.storageStatus.smartData.deviceData.get( driveName )
         # drive can be none since smartData.deviceData can technically differ
         # from connectedDevices for a short time
         if drive is None:
            return
         newStorageLifetime = self.getAtaUsage( drive )
      elif busType == 'nvme':
         drive = self.storageStatus.nvmeData.deviceData.get( driveName )
         # Same as above since nvmeData.deviceData can differ as well
         if drive is None:
            return
         newStorageLifetime = drive.lifetimeRemaining.value

      if newStorageLifetime == self.storageLifetime:
         t0( "Did not detect change in drive wear" )
         return

      # Have to do logging in SuperServer since the Archive Lib does not provide
      # a logging interface. Also make sure we don't generate many extra log msgs
      if self.storageLifetime >= ArchiveLib.DRIVE_LIFETIME_CUTOFF and \
         newStorageLifetime <= ArchiveLib.DRIVE_LIFETIME_CUTOFF and \
         not self.archiveConfig.shutdown:
         Logging.log( EOS_ARCHIVE_DISABLED_STORAGE_LIFETIME )

      self.storageLifetime = newStorageLifetime
      self.actionQueue.enqueue( self.doStorageLifetime )

   def doStorageLifetime( self, archive: ArchiveLib.Archive ):
      t0( f"Attempting to set storageLifetime to {self.storageLifetime}" )
      archive.updateConfig( storageLifetime=self.storageLifetime )

   def archiveDriveName( self ):
      currentDest: str = self.archiveConfig.dest
      if currentDest is None or currentDest == "":
         # Fallback to config file
         archive = None
         try:
            # We do not call actionQueue.currentArchive() to avoid the associated log
            # messages since this code will run even if LogArchiver is disabled.
            archive = ArchiveLib.Archive.currentArchive()
         except AssertionError as e:
            t0( f"Got assertion error while getting current archive {e}" )
         if archive is None or archive is False:
            # Without an archive we give up
            return None
         currentDest = archive.rootDirPath

      for driveName in self.storageStatus.connectedDevices.disk:
         connectedDisk = self.storageStatus.connectedDevices.disk[ driveName ]
         if connectedDisk.mountPoint == currentDest:
            return driveName
      return None

   def getAtaUsage( self, ataDrive ):
      """Get lifetime of an ata drive. If it can not be determined default to 100"""
      if not ataDrive.lifetimeRemaining.present:
         return 100

      return ataDrive.lifetimeRemaining.value.standard

class LogArchiverConfigReactor( Tac.Notifiee ):
   """
   Reactor to process the change in quota pct or the change in archive status.
   The handler will invoke archive code to handle these.
   """

   notifierTypeName = 'Mgmt::Archive::Config'
   superServerArchiveLockTimeout = 30

   def __init__( self, archiveConfig, actionQueue: ActionQueue ):
      super().__init__( archiveConfig )
      self._config = archiveConfig
      self.actionQueue = actionQueue
      os.environ[ ArchiveLib.Archive.archiveLockTimeoutEnvVar ] = (
         str( self.superServerArchiveLockTimeout ) )

   def doShutdown( self, archive ):
      if self._config.shutdown:
         t0( 'disable archive' )
         archive.disable()
      else:
         t0( 'enable archive' )
         archive.enable()

   @Tac.handler( 'shutdown' )
   def handleArchiveShutdown( self ):
      """Handle update to management archive shutdown/noshutdown."""

      t0( 'archive config shutdown state changed to', self._config.shutdown )

      self.actionQueue.enqueue( self.doShutdown )

   def doQuotaPct( self, archive ):
      try:
         t0( 'set archive quota pct to', self._config.quotapct )
         archive.setQuotaPct( self._config.quotapct )
         t0( 'done set archive quota pct to', self._config.quotapct )
      except SpaceMgmtLib.Quota.QuotaCmdException as e:
         t0( 'updating quota failed because of a QUOTACMD_ERROR' )
         Logging.log( EOS_ARCHIVE_QUOTACMD_ERROR, e )
      except AssertionError as e:
         t0( 'updating quota failed because of an FS_ERROR' )
         Logging.log( EOS_ARCHIVE_FS_ERROR, e )

   @Tac.handler( 'quotapct' )
   def handleArchiveQuotaPct( self ):
      """Handle update to management archive quotapct [0-100]."""

      t0( 'archive config quotapct changed to', self._config.quotapct )

      if CEosHelper.isCeos():
         t0( 'can not set quota percentage in ceos' )
         return

      self.actionQueue.enqueue( self.doQuotaPct )

   def _overrideArchiveConfigFile( self, destName, destPath, enabled ):
      t0( 'overriding archive config file' )
      Tac.run( [ 'rm', '-f', ArchiveLib.Archive.configFilePath ],
               asRoot=True,
               ignoreReturnCode=True,
               stdout=Tac.DISCARD,
               stderr=Tac.DISCARD )
      try:
         ArchiveLib.Archive.writeConfig( destName, destPath, None, enabled, 100 )
      except ( OSError, ValueError ) as e:
         t0( 'failed to write archive config file:', e )
         return False
      return True

   def doDestSetup( self, archive ):
      try:
         if archive.enabled:
            t0( 'rotate any existing archive directory and setup archive' )
            archive.setup( rotate=True, ignoreConfig=True )
         else:
            t0( 'rotate any existing archive directory' )
            archive.rotateArchiveDir()
      except ( SpaceMgmtLib.Quota.QuotaCmdException,
               Tac.SystemCommandError ) as e:
         Logging.log( EOS_ARCHIVE_QUOTACMD_ERROR, e )
      except ( OSError, AssertionError ) as e:
         Logging.log( EOS_ARCHIVE_FS_ERROR, e )

   @Tac.handler( 'dest' )
   def handleArchiveDest( self ):
      """Handle update to management archive destination."""

      t0( "archive config destination changed to '%s'" % self._config.dest )

      if self._config.dest == '':
         ArchiveLib.Archive.updateConfig( name='', path='', quotaPct=None )
      else:
         try:
            destName, destPath = self._config.dest.split( ':' )
         except ValueError:
            t0( 'invalid archive config dest format:', self._config.dest )
            return

         try:
            t0( 'update archive config file' )
            ArchiveLib.Archive.updateConfig( name=destName,
                                             path=destPath,
                                             quotaPct=None )
         except ( OSError, ValueError ) as e:
            t0( 'failed to update archive config file:', e )
            if not self._overrideArchiveConfigFile( destName,
                                                    destPath,
                                                    not self._config.shutdown ):
               return

         self.actionQueue.enqueue( self.doDestSetup )

   def close( self ):
      """Handle management archive close."""

      Tac.Notifiee.close( self )

class LogArchiver( SuperServer.SuperServerAgent ):
   def __init__( self, entityManager ):
      SuperServer.SuperServerAgent.__init__( self, entityManager )
      mg = entityManager.mountGroup()

      # SSD File Archive config and status object
      self.archiveConfig = mg.mount( 'mgmt/archive/config',
                                     'Mgmt::Archive::Config', 'r' )
      self.storageDeviceStatus = mg.mount( Cell.path(
                                           'hardware/storageDevices/status' ),
                                           'StorageDevices::Status', 'r' )

      self.configReactor = None
      self.actionQueue = ActionQueue()
      self.storageLifetimeReactor = None

      def _finish():
         if self.active():
            self.onSwitchover( None )

      mg.close( _finish )

   def onSwitchover( self, protocol ):
      # Create the reactors
      self.configReactor = LogArchiverConfigReactor( self.archiveConfig,
                                                     self.actionQueue )
      if toggleDisableLowStorageLifetimeEnabled():
         self.storageLifetimeReactor = (
            StorageLifetimeReactor( self.storageDeviceStatus,
                                    self.archiveConfig, self.actionQueue )
         )

      self.warm_ = True

   def warm( self ):
      return True

def Plugin( ctx ):
   ctx.registerService( LogArchiver( ctx.entityManager ) )
