#!/usr/bin/env python3
# Copyright (c) 2006-2010 Arista Networks, Inc.  All rights reserved.
# Arista Networks, Inc. Confidential and Proprietary.

import Tac
import Tracing
import EntityManager
import Plugins
import Cell
import re
import sys
import os
import SysdbUtil
from MountProfileUtils import handleExtraPreinits
import time
from time import asctime
import socket

t0 = Tracing.trace0
t1 = Tracing.trace1
t5 = Tracing.trace5
t6 = Tracing.trace6
t9 = Tracing.trace9

# Below function hooks into Tac::Entity::newEntity calls in SysdbPlugins.
# This is to detect the creation of mountable Entities in Sysdb via SysdbPlugins
# which is not allowed.

def newEntityCheckNonMountableHook( entity, typeName, path ):
   assert entity is not None
   assert typeName is not None
   assert path is not None

   myEntity = entity
   allDirs = True
   # For each newEntity being created, ensure that it is not a mount point
   # the way to verify this is that at least one of the objects in the parental
   # hierarchy is not a Tac::Dir. If they are all Tac::Dir s, then this path
   # becomes mountable
   parents = []
   while myEntity.parent:
      # pylint: disable-next=superfluous-parens,unidiomatic-typecheck
      if( type( myEntity ) != type( Tac.root ) ): # this is a Tac::Dir,
         allDirs = False
      myEntity = myEntity.parent
      parents.append( myEntity )
   # pylint: disable-next=singleton-comparison
   if allDirs == True and ( len( parents ) > 1 ):
      print( "*** newEntity NOT ALLOWED *** for mountable path: " +
             entity.fullName + '/' + path )
      assert False, "newEntity not allowed via SysdbPlugins"
   del parents

class Standby( EntityManager.Remote ):
   """This entity manager obtains all requested entities by mounting them from 
   Sysdb on the active supe.  If the active Sysdb crashes, then this Sysdb
   takes over.
   """

   def __init__( self, sysname,
                 waitForSysdbToInitialize=True,
                 dieOnDisconnect=False, sysdbhostport=None, sysdbsockname=None,
                 rootFlags=None, sysdbInitWaitTime=None,
                 plugins=None, pluginPath=None, noPlugins=False,
                 preInitProfiles=None, loadMountProfile=True, **kargs ):
      """Parameters:

      * ``waitForSysdbToInitialize`` -- Do not return from the constructor
        until Sysdb has completed initializing, i.e. processed startup-config.
      * ``dieOnDisconnect`` -- Abort the process if the connection to Sysdb
        is disconnected, which happens if Sysdb closes the connection, which
        may happen if this process gets too far behind in handling messages,
        or if Sysdb restarts.  Note: do not set this to False unless you really
        know what you are doing.
      * ``sysdbhostport`` -- a tuple (host, port number) on which Sysdb is 
        listening for entity log connections.
      * ``sysdbsockname`` -- a unix domain socket name on which Sysdb is
        listening for entity log connections.
      """
      t0( 'StandbySysdb: __init__' )
      self._entityCreationAllowed = True
      sysnameDir = Tac.root.newEntity( 'Tac::Dir', sysname )
      root = sysnameDir.newEntity( 'Tac::Dir', 'Sysdb' )
      serveraddr = sysdbsockname or sysdbhostport or \
                   EntityManager.sysdbServerAddress()
      self.connectionFailed_ = False

      # Set environment variables if needed, otherwise make sure they're unset
      handleExtraPreinits( preInitProfiles )

      EntityManager.Remote.__init__( self, sysname, serveraddr,
                       root=root, remoteRootPath=root.fullName,
                       dieOnDisconnect=dieOnDisconnect, mountRoot=True,
                       connectionTimeout=SysdbUtil.standbyConnectionTimeout,
                       rootFlags=rootFlags,
                       sysdbServer=True,
                       failCallback=self.mountFailed,
                       # Use TCP KeepAlive as a backup.  Normally we should
                       # be notified by the kernel sending us a FIN, or (if the
                       # kernel crashes) a loss of scd election mgr heartbeat.
                       # So, for example, if things are idle, kernel crashed
                       # but came up without us noticing the missing
                       # heartbeat, then this mechanism will kick in.
                       # Because it is just a backup, we set KEEPIDLE to
                       # 30 seconds.  It is to get us unwedged, not to respond
                       # quickly. Additionally we want to increase the
                       # socket queue depth between standby and active to 60MB
                       # similar to the active-standby direction.
                       entityLogSocketOptions=[ ( socket.SOL_SOCKET,
                                                  socket.SO_KEEPALIVE, 1 ),
                            ( socket.IPPROTO_TCP, socket.TCP_KEEPCNT, 3 ),
                            ( socket.IPPROTO_TCP, socket.TCP_KEEPIDLE, 30 ),
                            ( socket.IPPROTO_TCP, socket.TCP_KEEPINTVL, 2 ),
                            ( socket.SOL_SOCKET, socket.SO_SNDBUFFORCE,
                               60 * 1024 * 1024 ) ],
                       verbose=True,
                       debug=False,
                       loadMountProfile=loadMountProfile,
                       **kargs
                       )
      t0( asctime(), 'mount complete' )

      Tac.waitFor( lambda: self.localStatus_,
                   description="cell to be created",
                   timeout=10 )
      # Let agents' entityManager know they are running on standby (so
      # they defer mounting anything writable)
      self.localStatus_.standby = True
      # pylint: disable-next=consider-using-f-string
      sysdbCellDir = 'cell/%d' % Cell.cellId()
      sysdbCellDirFullpath = self.cEm_.mountKey( sysdbCellDir )
      t9( "Setting writableMounts for path " + sysdbCellDirFullpath )
      self.localStatus_.writableMounts[ sysdbCellDirFullpath ] = True
      self.localStatus_.writableMounts[ self.cEm_.mountKey( 'cell' ) ] = True
      self.redundancyStatus_.mode = "standby"
      self.redundancyStatus_.protocol = "sso"

      activeCellId = Cell.activeCell()
      if( not activeCellId is None ): # pylint: disable=superfluous-parens
         self.redundancyStatus_.activeCellId = activeCellId
      else:
         self.redundancyStatus_.activeCellId = Cell.cellId()

      if os.getenv( "A4_CHROOT" ):
         # Set initialized to True in workspace environment.
         # There are tests which set cellType to supervisor & dont start ElectionMgr
         # Setting initialized allows those tests to unblock.
         self.redundancyStatus_.initialized = True

      if waitForSysdbToInitialize:
         if not self.cEm_.doMountSysdbStatus( 0 ):
            raise KeyboardInterrupt

      Tac.flushEntityLog()
      t0( asctime(), 'registered cell specific dirs' )

      self.notifyWhenActive_ = []
      t6( "Standby init initializing configRoot" )
      self.configRoot_ = self.register( 'Sysdb/configRoot', 'Sysdb::ConfigRoot' )
      t1( "running plugins:", plugins )

      self.preInitPaths_ = []
      # If noPlugins is set to True, don't load any plugins.
      if not noPlugins:
         t0( 'Starting preinit entity creation/mounting' )
         pathInfos = self.cEm_.mountFileInfoWrap.getPreinitPathInfo().preinitPathData
         for info in pathInfos.values():
            # Skip createOnMount paths
            if info.attr != 'createOnMount':
               path = info.path
               # Resolve %cellId and %peerCellId
               cellIdTokenVals = {
                  '%cellId' : str( Cell.cellId() ),
                  '%peerCellId' : str( Cell.peerCellId() ),
               }
               for token, val in cellIdTokenVals.items():
                  path = re.sub( re.escape( token ), val, path, flags=re.IGNORECASE )
               # Below function may not be called from anywhere else
               self._createEntity( path, info.type,
                                 ( info.attr == 'createOnStandby' ) )
         t0( 'Finished preinit entity creation/mounting' )
         self.cEm_.setDemuxMgrAutoDelPaths()

         # Populate configRoot
         t0( 'Populating configRoot' )
         self.populateConfigRoot()

         # Override Entity creation function calls to prevent Sysdb Entities
         # from being created from SysdbPlugins
         self.disableEntityCreation()

         Plugins.loadPlugins( 'SysdbPlugin', self, plugins, pluginPath )

         # Note the total hack - the plugins/pluginpath parameter is rarely
         # specified in our tests, but if it is, then we'll likely see
         # an error trying to load the LauncherPlugins since
         # the exact same modules do not necessarily exist / we'll just
         # reload the same plugins twice. Since this is just a temporary
         # change before extracting all behaviour out of SysdbPlugins, for
         # now we simply do not load any LauncherPlugins if plugins
         # is specified.
         if plugins is None and pluginPath is None:
            Plugins.loadPlugins( 'LauncherPlugin',
                                 EntityManager.SysdbContext( self ) )

         # pylint: disable-next=singleton-comparison
         assert self._entityCreationAllowed == False
         # Note that we do not need to load the DefaultConfigPlugins
         # here, as they will be run on the active and we'll get all the
         # state from there. (see BUG185300)
         Tac.flushEntityLog()
         t0( asctime(), 'SysdbPlugin loaded\n' )
         # restore the ability to create Entities
         self.enableEntityCreation()

      active = not ( self.redundancyStatus_.protocol == 'sso' and
                     self.redundancyStatus_.mode == 'standby' )
      # Override Entity creation function calls to prevent Sysdb Entities
      # from being created from SysdbPlugins
      self.disableEntityCreation()
      self.doActiveCallbacks( active )
      # pylint: disable-next=singleton-comparison
      assert self._entityCreationAllowed == False
      # restore the ability to create Entities
      self.enableEntityCreation()

      t6( "Standby init roots complete" )
      self.configRoot_.rootsComplete = True
      # ACRs for Sysdb must have write permission even if they're issued from Standby
      # otherwise we crash with a write to readonly proxy
      mg = self.mountGroup()
      mg.mount(
            Cell.path( 'agent/commandRequest/config/Sysdb' ), 'Tac::Dir', 'wifc' )
      mg.close( blocking=True )
      if not self.isLocalEm():
         # This is required because cell itself gets mounted as a "wc" on standby
         # (verified this by looking at logs). Most likely, that is because
         # doNestedMounts creates the "/ar/Sysdb/cell" path while creating
         # "/ar/Sysdb/cell/2" path.
         # Mounting here with an explicit 'i' causes this required path to be
         # mounted correctly
         peerStatusPath = Cell.path( 'agent/status', Cell.activeCell() )
         mg = self.mountGroup()
         mg.mount( peerStatusPath, 'Tac::Dir', 'ri' )
         mg.close( blocking=True )

   def enableEntityCreation( self ):
      # Function must either be called from the __init__ function above or
      # from a test file only
      # pylint: disable-msg=W0212
      assert ( sys._getframe( 1 ).f_code.co_name == "__init__" ) or \
            os.getenv( "A4_CHROOT" )
      self._entityCreationAllowed = True
      Tac.newEntityHookIs( None )

   def disableEntityCreation( self ):
      self._entityCreationAllowed = False
      Tac.newEntityHookIs( newEntityCheckNonMountableHook )

   def mountFailed( self, path ):
      t5( "Detected failure on mount socket via path", path )
      if self.connectionFailed_ == True: # pylint: disable=singleton-comparison
         # Only really need to do this once, and no point in further delaying
         # the timer.
         return
      t5( "Starting timer to exit Sysdb, unless switchover starts soon." )
      # pylint: disable-next=attribute-defined-outside-init
      self.waitingToSwitchover_ = False
      self.connectionFailed_ = True
      # pylint: disable-next=attribute-defined-outside-init
      self.failureSwitchoverTimer_ = Tac.ClockNotifiee()
      self.failureSwitchoverTimer_.timeMin = Tac.endOfTime
      self.failureSwitchoverTimer_.handler = self._handleActiveDied
      # We should detect active losing heartbeat in 250 milliseconds.
      # Wait at least twice that long.  In fact, this is a fallback mechanism
      # to avoid getting permanently stuck in a bad state, under very uncommon
      # uncommon circumstances.  So wait 10 times as long [under autobuild
      # some breadth tests seem to take a very long time to detect switchover,
      # so we need some breathing space.] Now wait another 25 seconds (30
      # seconds total) so that if switchover takes a long time to start after
      # Sysdb dies we don't inadvertently die rather than switchover.
      #           If we wait 30 seconds, and haven't started switchover by
      # *then*, the best course is to exit and restart.  Either the other
      # Sysdb has recovered somehow and we need to connect to the new
      # instance, or else they crashed and we are hanging and not switching
      # over --- in either case we are in a broken state.
      #
      # NOTE: keep it 15 seconds as the "reload" case might need more time to
      # shut down the supervisor before killing Sysdb. At this point, we are
      # not doing switchover, so let the standby stay in the limbo state for a
      # little longer doesn't seem to be a real problem.
      # NOTE 2: BUG344851 It has been seen on Abuilds for this timeout to happen with
      # no apparent reason. We suspect the process got swapped out and the switchover
      # was not able to start within 15 seconds. Increasing timeout to 120 seconds
      # and see if the issue disappears.
      gracePeriod = 15 if not os.getenv( "A4_CHROOT" ) else 120

      self.failureSwitchoverTimer_.timeMin = Tac.now() + gracePeriod
      # pylint: disable-next=attribute-defined-outside-init
      self.waitingToSwitchover_ = True

   def _handleActiveDied( self ):
      if self.waitingToSwitchover_:
         # Would be false if we were called spuriously before setting the
         # time.
         # pylint: disable-next=consider-using-in
         if( ( self.redundancyStatus_.mode != 'switchover' ) and
            ( self.redundancyStatus_.mode != 'active' ) ):
            # Uh oh! Active Sysdb is dead, but we haven't tried to switchover.
            # Let's restart
            msg = "Restarting standby Sysdb because active Sysdb died "
            msg += "and we did not switchover"
            t0( msg )
            print( asctime(), msg, file=sys.stderr )
            sys.exit( 0 )
         # pylint: disable-next=attribute-defined-outside-init
         self.waitingToSwitchover_ = False
         self.failureSwitchoverTimer_.timeMin = Tac.endOfTime
         # pylint: disable-next=attribute-defined-outside-init
         self.failureSwitchoverTimer_ = None

   def registerActiveCallback( self, func ):
      '''Register a one-argument function that will get called whenever 
      doActiveCallbacks happens.'''
      self.notifyWhenActive_.append( func )

   def doActiveCallbacks( self, active ):
      '''Call all the registered 'handleActive' callbacks. It is important that 
      these get called in the order that they were registered, in order to satisfy 
      any dependencies that may have depended on the order of the plugins loading.'''
      t0( "Calling registered 'handleActive' callbacks with active =", active )
      totalTime = 0.0
      for func in self.notifyWhenActive_:
         # XXX vwen: Maybe remove this after we have optimized the
         # handleActive callbacks
         start = time.time()
         func( active )
         elapsedTime = time.time() - start
         t1( "%s.%s takes %f seconds" % ( # pylint: disable=consider-using-f-string
            func.__module__, func.__name__, elapsedTime ) )
         totalTime += elapsedTime
      # pylint: disable-next=consider-using-f-string
      t1( "doActiveCallbacks takes %f seconds" % totalTime )

   class LocalFinish():
      def __init__( self, em, finish=None ):
         self.em = em
         self.finish = finish

      def handleLocalFinish( self ):
         self.em.localStatus_.standby = False
         self.em.doActiveCallbacks( True )
         if self.finish:
            self.finish()

   # Only set the LocalFinish callback, do not change isLocalEm
   def setLocalFinish( self, finish=None ):
      EntityManager.Remote.setLocalCompleteReactor( self,
         finish=self.LocalFinish( self, finish ).handleLocalFinish )

   # Convert to local EM and set the LocalFinish callback
   def convertToLocalEm( self, finish=None ):
      EntityManager.Remote.convertToLocalEm( self,
         finish=self.LocalFinish( self, finish ).handleLocalFinish )

   # Needs to behave like Local for Sysdb plugins:
   # Needs to behave like Local for Sysdb plugins:
   def register( self, path, typenameOrType, force=False ):
      """Used by Sysdb plugins to register an entity to be added to Sysdb.
      The typenameOrType parameter can either be a C++ type name, or a
      subclass of DynEntity.Type."""
      if self.isLocalEm():
         # check if Entity creation is allowed
         if not self._entityCreationAllowed:
            print( "*** register NOT ALLOWED *** in SysdbPlugins. Path: " +
                   path + " Type: " + typenameOrType )
            assert False, "register NOT ALLOWED"
         # ignore force, irrelevant on Local but kept for consistency.
         return self.cEm_.doCreateEntity( path, typenameOrType, "wi" )
      else:
         t1( "register standby:", path )
         cleanedPath = EntityManager.cleanPath( path )
         fullpath = os.path.join( self.root_.parent.fullName,
                                  self.root_.name,
                                  cleanedPath )
         e = None
         try:
            e = Tac.entity( fullpath )
         except NameError:
            pass
         else:
            if (
                 # If force, need to (re)mount writably unless the entity is
                 # already mounted writably.
                 ( ( not force ) or # pylint: disable=simplifiable-condition
                   # XXX - temporarily disabled this check
                   # self.localStatus_.writableMounts.get( cleanedPath ) ) and
                   # XXX - and replace with
                   False ) and
                 # If the specified type is different, we need to remount.
                 e.tacType.fullTypeName == typenameOrType ):
               t1( "registered existing" )
               return e
         exists = not e is None
         if force and exists:
            t1( "registered existing entity writably" )
         elif force and not exists:
            t1( "registered new entity writably" )
         elif exists:
            t1( "registered existing entity in new mode or type" )
         else:
            # we should check that we are in a writable subtree
            t1( "registered new entity" )
         t9( "Setting writableMounts for path " + self.cEm_.mountKey( cleanedPath ) )
         self.localStatus_.writableMounts[ self.cEm_.mountKey( cleanedPath ) ] = True
         # AgentMount infra also mount all ancestor paths
         # We can't check the useAgentMount flag since this is in Sysdb context
         assert cleanedPath != ''
         tokens = self.cEm_.mountKey( cleanedPath ).split( '/' )
         tmpPath = tokens[ 0 ]
         t9( "Setting writableMounts for path " + tmpPath )
         self.localStatus_.writableMounts[ tmpPath ] = True
         for token in tokens[ 1 : -1 ]:
            tmpPath += '/' + token
            t9( "Setting writableMounts for path " + tmpPath )
            self.localStatus_.writableMounts[ tmpPath ] = True
         # Block - because non-SSO aware callers assume that the entity
         # exists immediately after the call to register.
         return self.cEm_.doRegister( path, typenameOrType, "wci" )

   def registerConfigMount( self, path, typenameOrType,
                            immediate=False, force=False ):
      """Like register(), used by Sysdb plugins to register an entity to be
      added to Sysdb. Additionally, indicates the entity is for configuration
      (previously done in CLI plug-ins with ConfigMount.mount()). The
      typenameOrType parameter can either be a C++ type name, or a
      subclass of DynEntity.Type.

      Changes the default sense of the force parameter, then invokes the
      base class implementation"""
      return EntityManager.Remote.registerConfigMount( self,
                                                       path,
                                                       typenameOrType,
                                                       immediate,
                                                       force )

   # Essentially synonym for register
   def mount( self, path, typeName, mode='r', mountGroup=None ):
      if self.isLocalEm():
         return self.register( path, typeName )
      else:
         return EntityManager.Remote.mount( self, path, typeName, mode,
               mountGroup=mountGroup )

   _createEntity = register

   # We don't alias entity to mount, because if we are not local then
   # we want to check that it exists, and matches type.
   def entity( self, path, typeName='', mode="r" ):
      if self.isLocalEm():
         return( self.lookup( path ) ) # pylint: disable=superfluous-parens
      else:
         # pylint: disable-next=protected-access
         return EntityManager._NotSimple.entity( self, path, typeName, mode )
