# Copyright (c) 2014 Arista Networks, Inc.  All rights reserved.
# Arista Networks, Inc. Confidential and Proprietary.

# pylint: disable=singleton-comparison

"""
Module implements MlagPlugin for syncing ARP entries during reload-delay for
graceful re-entry and faster convergence in newly formed peer.
1. When reload-delay is true, MLAG primary requests ARP agent to log ARP entries
   to a file.
2. On completion of file logging, we react to status and spawn a client to
   transfer the file using a reserved port on local-interface
3. MLAG secondary starts the server and requests ARP restore when server exits (
   with a success status ).

FileTransfer is done via MlagFileTransfer binary which is spawned from the plugin.
"""

from __future__ import absolute_import, division, print_function
from GenericReactor import GenericReactor
import Tracing
import Logging
import Tac
import os, errno
import MlagShared
import MlagMountHelper
import QuickTrace
from ArpFileCommonLib import ARP_FILE_INPUT_CONFIG, ARP_FILE_STATUS
from MlagShared import ProcessManager

t2 = Tracing.trace2 # Events in Plugin method 
t3 = Tracing.trace3 # Ununsual events/error scenario
t4 = Tracing.trace4 # Events in FileTransfer manager

qv = QuickTrace.Var
qt3 = QuickTrace.trace3
qt4 = QuickTrace.trace4

# Constants used in the plugin which needed to be tuned for testing
MLAG_ARP_FILEPATH = '/tmp/mlag/'
MLAG_ARP_SYNC_PORT = int( os.environ.get( "MLAG_ARP_SYNC_PORT", 
                          str( MlagShared.MLAG_ARP_SYNC_PORT ) ) )
# The delta timeout is needed for ports to come up for arp refresh to egress
ARP_REFRESH_DELTA = int( os.environ.get( "ARP_REFRESH_DELTA", '120' ) )
# Retry client in primary for the race where it connects before server listens
RETRY_TIMEOUT = int( os.environ.get( "RETRY_TIMEOUT", '15' ) )
MAX_RETRY = 12

# Global variable to avoid losing reference to reactors
arpFileTransferManager = None

MLAG_ARP_SYNC_COMPLETE = Logging.LogHandle(
              "MLAG_ARP_SYNC_COMPLETE",
              severity=Logging.logInfo,
              fmt="MLAG %s ARP table is synchronized with peer %s",
              explanation="ARP entries were successfully synchronized between MLAG "
              "peers during the reload delay",
              recommendedAction="Logging.NO_ACTION_REQUIRED" )
MLAG_ARP_SYNC_FAILURE = Logging.LogHandle(
              "MLAG_ARP_SYNC_FAILURE",
              severity=Logging.logWarning,
              fmt="MLAG %s ARP table synchronization failed with peer %s",
              explanation="ARP entries failed to synchronize between MLAG peers "
              "during the reload delay",
              recommendedAction="Logging.NO_ACTION_REQUIRED" )

def mkdirMlagArpFilePath():
   # Create /tmp/mlag directory for hosting files which will be synced between peers 
   try:
      os.makedirs( MLAG_ARP_FILEPATH )
      t2( "Created mlag directory", MLAG_ARP_FILEPATH )
   except OSError as err:
      if err.errno == errno.EEXIST and os.path.isdir( MLAG_ARP_FILEPATH ):
         t3( "Directory already exists", MLAG_ARP_FILEPATH )
         qt3( "Directory already exists", qv( MLAG_ARP_FILEPATH ) )
      else:
         t3( "Unexpected error", err.errno )
         qt3( "Unexpected error", qv( err.errno ) )
         return False
   return True

class ArpFileTransferManager():
   """
   Provides file transfer functionality using MlagFileTransfer binary.
   It reacts to mlagStatus::mlagState and protoStatus::portsErrdisabled state.
   1. If we are primary and in reload-delay, request ARP agent to fileLog arp
      entries.
   2. If we are secondary and in reload-delay, request ARP agent to restore ARP
      entries from synced file.
   3. Invoke ProcessManager object for MlagFileTransfer binary
   """
   def __init__( self, mlagConfig, mlagStatus, mlagHwStatus, protoStatus,
                 arpFileConfig, arpFileStatus, redundancyStatus ):
      self.mlagConfig = mlagConfig
      self.mlagStatus = mlagStatus
      self.mlagHwStatus = mlagHwStatus
      self.protoStatus = protoStatus
      self.arpFileConfig = arpFileConfig
      self.arpFileStatus = arpFileStatus
      self.redundancyStatus = redundancyStatus
      self.procMgr = None
      # Define the reactors
      self.arpFileStatusReactor = None
      self.mlagStateReactor = GenericReactor( self.mlagStatus, 
                                              [ 'mlagState', 'failover' ],
                                              self.handleArpSyncRestore )
      self.portsErrdisabledReactor = GenericReactor( self.protoStatus,
                                                     [ 'portsErrdisabled' ],
                                                     self.handleArpSyncRestore )
      self.redundancyModeReactor = GenericReactor( self.redundancyStatus,
                                                   [ 'mode' ],
                                                   self.handleArpSyncRestore,
                                                   callBackNow=True )

   def cleanupReactors( self ):
      self.arpFileStatusReactor = None
      self.mlagStateReactor = None
      self.portsErrdisabledReactor = None
      self.redundancyModeReactor = None

   def cleanup( self ):
      t4( "arpSync cleanup called" )
      qt4( "arpSync cleanup called" )
      if self.procMgr:
         self.procMgr.cleanup()
      # Cleanup reactor as it gets re-initialized as part of clientArpSync
      self.arpFileStatusReactor = None

   def handleArpSyncRestore( self, notifiee=None ):
      t4( "mlagState", self.mlagStatus.mlagState, 
          "portsErrdisabled", self.protoStatus.portsErrdisabled,
          "failover", self.mlagStatus.failover,
          "redundancyMode", self.redundancyStatus.mode,
          "namespace", self.mlagStatus.localIntfNamespace )
      qt4( "mlagState", qv( self.mlagStatus.mlagState ),
           "portsErrdisabled", qv( self.protoStatus.portsErrdisabled ),
           "failover", qv( self.mlagStatus.failover ),
           "redundancyMode", qv( self.redundancyStatus.mode ),
           "namespace", qv ( self.mlagStatus.localIntfNamespace ) )
      # If we failover from secondary while reload-delay is active,
      # then we should not request for arp logging. ArpFileTransfer is valid
      # only when failover is False
      if self.mlagStatus.failover or self.protoStatus.portsErrdisabled == False \
            or self.mlagStatus.mlagState not in [ 'primary', 'secondary' ] \
            or self.redundancyStatus.mode != 'active':
         t4( "Skip file transfer and cleanup" )
         self.cleanup()
         return
      # Check if we are primary/secondary and do the arp table sync.
      if self.mlagStatus.mlagState == 'primary':
         self.clientArpSync()
      elif self.mlagStatus.mlagState == 'secondary':
         self.serverArpRestore()
      else:
         t3( "Unexpected state. Let's assert as we should never reach here" )
         qt3( "Unexpected state. Let's assert as we should never reach here" )
         assert False, "Mlag state is neither primary/secondary"

   def getRefreshInterval( self ):
      # Based on reload-delay config, we add a delta to trigger ARP refresh
      # of restored entries
      if self.mlagConfig.reloadDelay.reloadDelayType == "reloadDelayConfigured":
         reloadDelay = self.mlagConfig.reloadDelay.delay
      else:
         reloadDelay = self.mlagHwStatus.reloadDelay
      return reloadDelay + ARP_REFRESH_DELTA

   def requestArpLog( self ):
      if not self.mlagArpLogFile():
         return
      t4( "requestArpLog called for", self.mlagArpLogFile() )
      self.arpFileConfig.newFileEntry( self.mlagArpLogFile(), 'fileLog', 0 )
      self.arpFileConfig.fileEntry[ self.mlagArpLogFile() ].opCount += 1

   def requestArpRestore( self ):
      mlagArpRestoreFile = self.mlagArpRestoreFile()
      if not mlagArpRestoreFile:
         qt3( "File doesn't exist to populate arp table" )
         return
      t4( "requestArpRestore for", mlagArpRestoreFile )
      qt4( "requestArpRestore for", qv( mlagArpRestoreFile ) )
      if mlagArpRestoreFile in self.arpFileConfig.fileEntry:
         if self.arpFileConfig.fileEntry[ mlagArpRestoreFile ].refreshInterval != \
               self.getRefreshInterval():
            t3( "Request for restore already exists with different ctor args" )
            qt3( "Request for restore already exists with different ctor args" )
            del self.arpFileConfig.fileEntry[ mlagArpRestoreFile ]

      self.arpFileConfig.newFileEntry( mlagArpRestoreFile, 'fileRestore',
                                       self.getRefreshInterval() )
      self.arpFileConfig.fileEntry[ mlagArpRestoreFile ].opCount += 1

   def clientArpSync( self ):
      t4( "clientArpSync called." )
      qt4( "clientArpSync called." )
      self.arpFileStatusReactor = GenericReactor( self.arpFileStatus, 
                                                  [ 'opCount' ],
                                                  self.handleOpCount )
      self.requestArpLog()

   def logArpSyncFail( self, returnCode ):
      try:
         errorMsg = os.strerror( returnCode )
      except ValueError:
         errorMsg = "Unknow Error"
      Logging.log( MLAG_ARP_SYNC_FAILURE,
                   self.mlagStatus.mlagState,
                   self.mlagConfig.peerAddress )
      return errorMsg

   def handleOpCount( self, notifiee=None, key=None ):
      # client side handler to ship arp table if status count matches config
      # count only when our mlagState is primary
      t4( "arpFileStatus opCount notification for", key )
      if self.mlagStatus.mlagState != 'primary':
         t3( "Invalid attempt to sync ARP entries as mlag state", 
             self.mlagStatus.mlagState )
         return

      if key != self.mlagArpLogFile():
         t4( "Ignored opCount update for", key )
         return

      def handleClientExit( returnCode ):
         t4( "Client returned", returnCode )
         if returnCode == 0:
            Logging.log( MLAG_ARP_SYNC_COMPLETE,
                         'primary',
                         self.mlagConfig.peerAddress )
            t4( "Succesfully transferred the file" )
            qt4( "Succesfully transferred the file" )
         else:
            errorMsg = self.logArpSyncFail( returnCode )
            t3( "Unexpected exit from client because", returnCode, errorMsg )
            qt3( "Unexpected exit from client because", qv( returnCode ),
                 qv( errorMsg ) )

      def handleGrepExit( returnCode ):
         t4( "Grep returned", returnCode )
         mlagArpLogFile = self.mlagArpLogFile()
         # if MLAG_PROC_RETURN_ERROR is YES, it will invoke handleClientExit
         # in oreder to fake a ARP Sync failed
         if returnCode == 0:
            # Ready to ship the file which has SVI ARP entries
            t4( "Attempting to ship the file", self.mlagArpGrepFile() )
            self.procMgr = ProcessManager( self.clientArgs(), 
                                           maxRetry=MAX_RETRY, 
                                           retryTimeout=RETRY_TIMEOUT,
                                           callback=handleClientExit )
            self.procMgr.run()
         elif returnCode == 1:
            t3( "No SVI ARP entries were found in", mlagArpLogFile )
            qt3( "No SVI ARP entries were found in", qv( mlagArpLogFile ) )
         else:
            errorMsg = self.logArpSyncFail( returnCode )
            t3( "We couldn't filter", mlagArpLogFile, "for SVIs because ",
                returnCode, errorMsg )
            qt3( "We couldn't filter", qv( mlagArpLogFile ),
                 "for SVIs because ", qv( returnCode ), qv( errorMsg ) )

      configOpCount = self.fileConfigOpCount( self.mlagArpLogFile() )
      statusOpCount = self.fileStatusOpCount( self.mlagArpLogFile() )
      t4( "arpFileConfig opCount", configOpCount, 
          "arpFileStatus opCount", statusOpCount )

      if configOpCount == statusOpCount and self.mlagArpGrepFile():
         # pylint: disable-next=consider-using-with
         grepStdout = open( self.mlagArpGrepFile(), "w" )
         self.procMgr = ProcessManager( self.grepArgs(), callback=handleGrepExit,
                                        stdout=grepStdout )
         self.procMgr.run()

   def serverArpRestore( self ): 
      t4( "serverArpRestore called" )
      qt4( "serverArpRestore called" )
      def handleServerExit( returnCode ):
         # callback for server process exit 
         if returnCode == 0:
            # Successful file transfer from client
            Logging.log( MLAG_ARP_SYNC_COMPLETE,
                         'secondary',
                         self.mlagConfig.peerAddress )
            self.requestArpRestore()
         else:
            errorMsg = self.logArpSyncFail( returnCode )
            t3( "Unexpected server exit because:", errorMsg )
            qt3( "Unexpected server exit because", qv( errorMsg ) )
      self.procMgr = ProcessManager( self.serverArgs(), callback=handleServerExit )
      self.procMgr.run()

   def clientArgs( self ):
      return [ 'MlagFileTransfer', '--client', 
               '-ip', str( self.mlagConfig.peerAddress ),
               '--port', str( MLAG_ARP_SYNC_PORT ), 
               '-dev', self.mlagStatus.localInterface.deviceName,
               '-f', self.mlagArpGrepFile(),
               '-n', self.mlagStatus.localIntfNamespace ]

   def serverArgs( self ):
      Af = Tac.Type( "Arnet::AddressFamily" )
      if self.mlagConfig.peerAddress.af == Af.ipv6:
         ipAddr = '::'
      else:
         ipAddr = '0.0.0.0'
      return [ 'MlagFileTransfer', '--server', '-ip', f'{ ipAddr }', 
               '--port', str( MLAG_ARP_SYNC_PORT ),
               '-dev', self.mlagStatus.localInterface.deviceName,
               '-f', self.mlagArpRestoreFile(),
               '-n', self.mlagStatus.localIntfNamespace ]

   def grepArgs( self ):
      # This filters all SVIs. But there can be some SVI on VLAN which is not
      # part of peer-link. 
      # XXX-Clarify: Should we filter for Vlans active on peer-link?
      # pylint: disable-next=anomalous-backslash-in-string
      return [ 'grep', '#\|Vlan\|Version', self.mlagArpLogFile() ]

   # pylint: disable-next=inconsistent-return-statements
   def fileConfigOpCount( self, fileName ):
      fileEntry = self.arpFileConfig.fileEntry.get( fileName )
      if fileEntry:
         return fileEntry.opCount

   def fileStatusOpCount( self, fileName ):
      return self.arpFileStatus.opCount.get( fileName )
   
   # All the files are hosted under /tmp/mlag. If the directory gets removed,
   # we should avoid side-effects of running into OS/IO exception
   def mlagArpFilePathExist( self ):
      if not os.path.isdir( MLAG_ARP_FILEPATH ):
         t3( "Mlag arp file path doesn't exist", MLAG_ARP_FILEPATH )
         qt3( "Mlag arp file path doesn't exist", qv( MLAG_ARP_FILEPATH ) )
         return mkdirMlagArpFilePath()
      else:
         return True

   def mlagArpLogFile( self ): # pylint: disable=inconsistent-return-statements
      if self.mlagArpFilePathExist():
         return os.environ.get( "MLAG_ARP_LOGFILE", '/tmp/mlag/arpLog.txt' )
   
   def mlagArpRestoreFile( self ): # pylint: disable=inconsistent-return-statements
      if self.mlagArpFilePathExist():
         return os.environ.get( "MLAG_ARP_RESTOREFILE", '/tmp/mlag/arpRestore.txt' )

   def mlagArpGrepFile( self ): # pylint: disable=inconsistent-return-statements
      if self.mlagArpFilePathExist():
         return os.environ.get( "MLAG_ARP_GREPFILE", '/tmp/mlag/arpGrep.txt' )

def Plugin( ctx ):
   ''' 
   In Active supervisor, we run ArpFileTransferManager which implements the
   reactors for mlagState and portsErrdisabled. We don't need a plugin callback
   and state notification is sufficient here.
   '''
   mg = ctx.entityManager.mountGroup()
   # Mount from Sysdb to communicate state to ARP for file log/restore
   arpFileConfig = mg.mount( "arp/file/input/config/mlag",
                             ARP_FILE_INPUT_CONFIG, "wc" )
   arpFileStatus = mg.mount( "arp/file/status", 
                             ARP_FILE_STATUS, "r" )
   # Mount entities relevant to mlag config, status and protoStatus for reading
   # reloadDelay, reacting to mlagState and portsErrdisabled state
   # Mount mlag/config, Mlag::Config and its dependent paths
   mlagConfig = MlagMountHelper.mountMlagConfig( mg )
   # Mount mlag/status, Mlag::Status and its dependent paths
   mlagStatus = MlagMountHelper.mountMlagStatus( mg )
   mlagHwStatus = mg.mount( "mlag/hardware/status",
                            "Mlag::Hardware::Status", "r" )
   protoStatus = mg.mount( "mlag/proto", "Mlag::ProtoStatus", "r" )

   # Create /tmp/mlag directory for hosting files which will be synced between peers
   mkdirMlagArpFilePath()
   
   def finishMounts():
      t2( "Mounts complete" )
      global arpFileTransferManager
      if arpFileTransferManager:
         # Happens in a cohabiting test on mlag agent restart which keeps the 
         # reference around because reactors hold the reference to notifier object
         t3( "We already have instantiated reactors. Cleaning it up" )
         qt3( "We already have instantiated reactors. Cleaning it up" )
         arpFileTransferManager.cleanup()
         arpFileTransferManager.cleanupReactors()
         arpFileTransferManager = None
      arpFileTransferManager = ArpFileTransferManager( mlagConfig,
            mlagStatus, mlagHwStatus, protoStatus, arpFileConfig,
            arpFileStatus, ctx.redundancyStatus )
   
   mg.close( finishMounts )
   


