#!/usr/bin/env python3
# Copyright (c) 2017 Arista Networks, Inc.  All rights reserved.
# Arista Networks, Inc. Confidential and Proprietary.


# pkgdeps: rpm dpdk
# pkgdeps: rpm pciutils

"""This module contains a plugin that manages Sfe-managed Ethernet phys."""

import json
import os
import re
import platform

import DesiredTracing
from EthIntf import MAX_SUPPORTED_MTU
import Fru
import Tac
import Tracing
from SfFruHelper import DEVICE, FIRST_MAC, MAC, PCI, DRIVER, ROLE, TYPE, VF2PF
import distutils.util
from VeosHypervisor import (
getPlatform,
platformCloud,
platformCloudOnly,
platformBareMetal,
platformCaravan,
platformHasNac,
platformOcteon10,
platformWillamette,
)
from CaravanFruHelper import CaravanCommon as CCommon

from Toggles.SfeSharedToggleLib import (
toggleSfeSharedNacEnabled,
toggleSfeNimSupportEnabled,
)

__defaultTraceHandle__ = Tracing.Handle( "Fru.Sfe" )
DesiredTracing.desiredTracingIs( 'Fru.Sfe/01' )

traceDetail = Tracing.trace9
traceNormal = Tracing.trace8
traceAlert = Tracing.trace1
traceError = Tracing.trace0

deviceCacheFile = "/var/run/sfaFruPluginDevices.json"
CLOUD_BESS_CONFIG = "/var/run/.veos-config"
dpdkDir = "/usr/share/dpdk/"

PF_PRESENCE_RETRY_LIMIT = 60

def isNimCard( sliceId ):
   return "Linecard" in sliceId

def populatePhyConfig( phyConfig_, sliceId ):
   phyConfig_.isSfeSecondaryForwardingAgent = False
   if platformHasNac() and not isNimCard( sliceId ):
      phyConfig_.nacPresent = True

   phyConfig_.hwPlatformOpenFlow = False
   phyConfig_.hwCapability.l3SubintfSupported = True
   traceDetail( "hwCapability.l3SubintfSupported = "
                f"{phyConfig_.hwCapability.l3SubintfSupported}" )

def mapIRQ():
   traceAlert( " _sfeInitialize: Mapping all IRQs to core 0" )
   # Internal func to set map all IRQs to core 0
   for dir_name, _, files in os.walk( '/proc/irq/' ):
      for f in files:
         if f != 'smp_affinity':
            continue
         fname = dir_name + '/' + f
         try:
            with open( fname, 'w' ) as fd:
               fd.write( '1' )
         except OSError:
            continue

def getPciInfoUsingEthtool( devName ):
   output = Tac.run( [ "ethtool", "-i", devName ], stdout=Tac.CAPTURE )
   pciAddress = re.search( 'bus-info: (.+)', output ).group( 1 )
   pciDriver = re.search( 'driver: (.+)', output ).group( 1 )

   assert pciAddress, "Unable to retrieve pciAddress using ethtool"
   assert pciDriver, "Unable to retrieve pciDriver using ethtool"
   traceDetail( f"Device {devName} PCI {pciAddress} Driver {pciDriver}" )
   return pciAddress, pciDriver

def createTacPciAddress( platform_, pciAddress ):
   domain = bus = slot = function = 0
   if platform_ != 'Azure':
      domain = int( pciAddress.split( ":" )[ 0 ], 16 )
      bus = int( pciAddress.split( ":" )[ 1 ], 16 )
      slot = int( pciAddress.split( ":" )[ 2 ].split( "." )[ 0 ], 16 )
      function = int( pciAddress.split( ":" )[ 2 ].split( "." )[ 1 ], 16 )

   iesPortId = function
   tacPciAddress = Tac.Value( "Inventory::PciAddress",
                              domain=domain, bus=bus,
                              slot=slot, function=function )
   return iesPortId, tacPciAddress

def bindDeviceToDpdk( devName, pciAddress, platform_ ):
   traceDetail( f"_sfeInitialize: adding device {devName} "
                f"PCI {pciAddress} to DPDK" )
   if platform_ == 'Azure':
      # Manually bind/unbind hv_netvsc device is required here.
      netUUID = "f8615163-df3e-46c5-913f-f2d2f965ed0e"
      sysPath = '/sys/bus/vmbus/drivers/'
      cmd = f"""echo {netUUID} > {sysPath}/uio_hv_generic/new_id"""
      try:
         Tac.run( [ "bash", "-c", cmd ], asRoot=True )
      except Tac.SystemCommandError:
         traceDetail( "_sfeInitialize: cannot add "
                      f"{netUUID} to uio_hv_generic" )

      cmd = f"""echo {pciAddress} > {sysPath}/hv_netvsc/unbind"""
      try:
         Tac.run( [ "bash", "-c", cmd ], asRoot=True )
      except Tac.SystemCommandError:
         traceDetail( "_sfeInitialize: cannot unbind "
                      f"{pciAddress} to hv_netvsc" )
      cmd = f"""echo {pciAddress} > {sysPath}/uio_hv_generic/bind"""
      try:
         Tac.run( [ "bash", "-c", cmd ], asRoot=True )
      except Tac.SystemCommandError:
         traceDetail( "_sfeInitialize: cannot bind "
                      f"{pciAddress} to uio_hv_generic" )
   else:
      # We need to use '--force', otherwise the ssh route to the et1 won't
      # let it unbind from kernel driver.
      Tac.run( [ dpdkDir + "tools/dpdk-devbind.py", "--force",
                 "-b", "vfio-pci", pciAddress ] )

def _isDriverSriov( driver ):
   return driver.lower().endswith( b"vf" )

def isL3SubIntfSupported( pciDriver ):
   if ( platformCloudOnly() and not platformCaravan() ):
      return False
   elif _isDriverSriov( pciDriver ):
      # With subinterface enabled, we disable the stripping of the VLAN tag on
      # the physical interface.
      # Please see SfeEthIntfHelper.tin/EthPhyIntf::createNetworkPort().
      # Subinterface on SRIOV VF interface is enabled only on
      # the caravan platform since there is only one VF per PF and
      # no VLAN associated with the interface.
      # However, this is not the case for non-caravan platforms where there
      # can be multiple VFs configured with a VLAN tag associated to it.
      # When a non-IP-tagged packet is received and there is no sub-interface
      # configured, the packet is received without the VLAN stripped off,
      # which can be punted to the kernel, and the kernel can drop the packet.
      return platformCaravan()
   else:
      return True

def createSfePhyCapability( pciDriver, port ):
   capability = Tac.newInstance( "Hardware::Phy::SfePhyCapability" )
   capability.l3SubintfSupported = isL3SubIntfSupported( pciDriver )
   traceDetail( f"Intf {port.intfId} "
      f"capability.l3SubintfSupported = {capability.l3SubintfSupported}" )
   return capability

# Extra args to the bessd required to enable inline ipsec for NAC ports
def _initBessExtraArgs( phyConfig ):
   iavfStr = ''
   nicType = Tac.Type( "Sfe::NicType" )
   for phy in phyConfig.phy.values():
      if phy.devType != nicType.nicUnknown:
         if iavfStr and iavfStr[ -1 ] != ',':
            iavfStr = iavfStr + ','
         if phy.devType == nicType.nac:
            if toggleSfeSharedNacEnabled():
               iavfStr = iavfStr + "dev=ipsec_iavf" + ',pci=' + \
                  phy.pciAddress.stringValue() + ',mac=00:00:00:00:00:00'
         else:
            iavfStr = iavfStr + "dev=xl710" + ',pci=' + \
               phy.pciAddress.stringValue() + ',mac=00:00:00:00:00:00'

   traceDetail( f'nac port pci : {iavfStr}' )
   if iavfStr:
      with open( "/var/run/bessd.extraargs", 'w' ) as f:
         f.write( iavfStr )

def getCardInfo( nimType, nimPciBus, slot ):
   # pciAddrToNameMapping:
   # These are the pciAddrs of the VFs once SRIOV is enabled
   # Before VFs enabled, should be {nimPciBus}:00.1, .2, etc.
   cardInfo = {
      "longhorn": {
         "pciDevId": CCommon.LONGHORN_PCI_IDS,
         "vfPciDevId": CCommon.LONGHORN_VF_PCI_IDS,
         "pciAddrToNameMapping": {
            f"0000:{nimPciBus:x}:01.0": f"et{slot}_1_1",
         }
      },
      "shorthorn": {
         "pciDevId": CCommon.SHORTHORN_PCI_IDS,
         "vfPciDevId": CCommon.SHORTHORN_VF_PCI_IDS,
         "pciAddrToNameMapping": {
            f"0000:{nimPciBus:x}:02.0": f"et{slot}_1",
            f"0000:{nimPciBus:x}:06.0": f"et{slot}_2",
            f"0000:{nimPciBus:x}:0a.0": f"et{slot}_3",
            f"0000:{nimPciBus:x}:0e.0": f"et{slot}_4",
         }
      }
   }

   return cardInfo[ nimType ]

# FruReadyReactor is used for NIMs in ind/cbl.
# This will react to FruReady ( which indicates that the card has been powered on )
# and is reponsible for vf creation, dpdk binding, populating hw model, etc.
class FruReadyReactor( Tac.Notifiee ):
   notifierTypeName = "Tac::Dir"

   def __init__( self, sysdbRoot, sliceId, phyDir, phyConfig, platform_ ):
      traceAlert( f"FruReadyReactor init {sliceId}" )

      self._sysdbRoot = sysdbRoot
      self._sliceId = sliceId
      self._phyDir = phyDir
      self._phyConfig = phyConfig
      self._platform = platform_

      # There is some time gap between when the FruReady is set ( card is
      # powered on ) and when pfs are visible in /sys/class/net.
      # handleFruReady will be called as soon as we set FruReady ( card is
      # powered on ). If we try to create vf ( write to
      # /sys/class/net/<>/device/sriov_numvfs for pf ) then this will give error
      # because the file will not exist. As a result of this we will get
      # AssertionError in enableVfs.
      # Hence we are using clock notifiee.
      self.pfCheckTimer = Tac.ClockNotifiee()
      self.pfCheckTimer.handler = self.handleTime
      self.pfCheckTimer.timeMin = Tac.endOfTime
      # Counter Used to keep track of number of tries to check for PF creation
      self.checkPfPresenceCounter = 0

      # slot number in which nim is present
      self.slot = self._phyDir.slot
      # nimType is longhorn, shorthorn, etc.
      assert len( self._phyDir.pciDevice.keys() ) == 1
      self.nimType = self._phyDir.pciDevice.keys()[ 0 ]
      self.pciDevice = self._phyDir.pciDevice[ self.nimType ]
      # bus number for the slot in which nim is present
      self.pciBus = self.pciDevice.pciAddress.bus
      # cardInfo stores all the required information for the nim card
      self.cardInfo = getCardInfo( self.nimType, self.pciBus, self.slot )

      hwSliceDir = sysdbRoot[ 'hardware' ][ 'slice' ][ self._sliceId ]
      assert hwSliceDir is not None
      Tac.Notifiee.__init__( self, hwSliceDir )
      self.handleFruReady( 'FruReady' )

   def getPciAddrForVf( self, devName ):
      for pciAddr, name in self.cardInfo[ 'pciAddrToNameMapping' ].items():
         if name == devName:
            return pciAddr
      return ""

   def getPfName( self, devName ):
      physfn = Tac.run( [ "find", "/sys/devices", "-name", "physfn" ],
                        stdout=Tac.CAPTURE ).split( '\n' )
      physfn = [ fn for fn in physfn if len( fn ) ]
      vfPciAddr = self.getPciAddrForVf( devName )
      for fn in physfn:
         rgx = r'\/sys\/devices\/.*\/([0-9a-f:.]{12})\/physfn'
         res = re.search( rgx, fn )
         if not res or not res.group( 1 ):
            continue
         pciAddr = res.group( 1 )
         if pciAddr == vfPciAddr:
            return CCommon.findPfForVf( fn, vfPciAddr )
      return ""

   def createVfs( self, numPf ):
      CCommon.enableVfs( self.cardInfo[ 'pciDevId' ],
            self.cardInfo[ 'vfPciDevId' ],
            self.pciBus,
            numPf,
            1 ) # 1 represnts the number of VFs to be created for a PF
      nimVfPciAddrs = CCommon.getVfDevices( self.pciBus,
                                          self.cardInfo[ 'vfPciDevId' ] )
      assert len( nimVfPciAddrs ) == numPf,\
            f"Found {len(nimVfPciAddrs)}, expected {numPf} "\
            f"vf pci devices on pciBus {self.pciBus:x}"
      CCommon.waitForVfIntfs( nimVfPciAddrs )
      # rename interfaces
      for device in os.listdir( CCommon.SYS_CLASS_NET ):
         vendorPath = f"{CCommon.SYS_CLASS_NET}/{device}/device/vendor"
         devicePath = f"{CCommon.SYS_CLASS_NET}/{device}/device/device"
         # search for vf interfaces that have not been renamed yet
         if re.match( r'eth\d', device ) and \
            os.path.exists( vendorPath ) and os.path.exists( devicePath ):
            pciAddr = CCommon.getPci( device )
            if pciAddr not in self.cardInfo[ 'pciAddrToNameMapping' ]:
               continue
            CCommon.renameInterface( device,
                           self.cardInfo[ 'pciAddrToNameMapping' ][ pciAddr ] )

   def pfCreated( self, pciDev, pciBus, numIntfs ):
      devices = 0
      for device in os.listdir( CCommon.SYS_CLASS_NET ):
         vendorPath = f"{CCommon.SYS_CLASS_NET}/{device}/device/vendor"
         devicePath = f"{CCommon.SYS_CLASS_NET}/{device}/device/device"
         # Need to check vendor/device path exists as USB ethernet adapters may
         # include this information
         if re.match( r'eth\d', device ) and \
            os.path.exists( vendorPath ) and os.path.exists( devicePath ):
            devPci = CCommon.getPci( device )
            if not devPci.startswith( f"0000:{pciBus:02x}" ):
               continue
            with open( vendorPath, 'r' ) as f:
               vendor_id = int( f.readline(), 16 )
            with open( devicePath, 'r' ) as f:
               device_id = int( f.readline(), 16 )
            if ( vendor_id, device_id ) == pciDev:
               devices += 1
      return devices == numIntfs

   def handleTime( self ):
      traceAlert( f"FruReadyReactor handleTime {self._sliceId}" )
      # If we have tried 60 times for checking PF creation, then log error and stop.
      if self.checkPfPresenceCounter == PF_PRESENCE_RETRY_LIMIT:
         traceError( f"PF not found for {self._sliceId}" )
         return

      numPf = len( self.cardInfo[ 'pciAddrToNameMapping' ] )
      # Do vf creation and further procesing only after ensuring that the PFs have
      # been created. If PFs are not created, then poll every one second.
      if self.pfCreated( self.cardInfo[ 'pciDevId' ], self.pciBus, numPf ):
         self.createVfs( numPf )

         populatePhyConfig( self._phyConfig, self._sliceId )
         if self._phyDir.phy:
            # sfeInitialize
            if os.environ.get( 'SKIP_SFE_INITIALIZE', None ):
               traceDetail( "Skipping sfe initialization for test" )
            else:
               mapIRQ()
               vdevName = ''
               traceAlert( "_sfeInitialize: rebinding interfaces to DPDK drivers" )
               traceAlert(
                  f"Creating Hardware::Phy::SfePhy for {list( self._phyDir.phy )}" )
               for fruPhy in self._phyDir.phy.values():
                  devName = fruPhy.name
                  traceDetail( f"Device {devName}" )
                  dpdkDev = False
                  pciAddress, pciDriver = getPciInfoUsingEthtool( devName )
                  iesPortId, tacPciAddress = createTacPciAddress( self._platform,
                                                                        pciAddress )
                  pciDriver = str( pciDriver ).encode( 'utf-8' )
                  if not dpdkDev:
                     bindDeviceToDpdk( devName, pciAddress, self._platform )
                  port = fruPhy.port
                  traceDetail( f"Phy {fruPhy.name} --> {port.intfId}" )
                  capability = createSfePhyCapability( pciDriver, port )
                  Fru.Dep( self._phyConfig.phy, fruPhy ).newMember(
                     fruPhy.name, port.intfId, MAX_SUPPORTED_MTU, port.macAddr,
                     tacPciAddress, pciDriver.decode(), iesPortId, port.id - 1,
                     False, vdevName, fruPhy.headerFormat, fruPhy.encapsulating,
                     capability, False, self._phyDir.pciDevice[ self.nimType ].type,
                     self.getPfName( devName ) )
                  for speed in fruPhy.shapingReqd:
                     self._phyConfig.phy[ fruPhy.name ].shapingReqd.add( speed )
            # sfeInitialize complete

            self._phyConfig.sfeFruPluginDone = True
            traceDetail( "isSfeSecondaryForwardingAgent:"
                         f"{self._phyConfig.isSfeSecondaryForwardingAgent} "
                         f"hwPlatformOpenFlow:{self._phyConfig.hwPlatformOpenFlow} "
                         f"sfeFruPluginDone:{self._phyConfig.sfeFruPluginDone}" )

         self._phyConfig.generation = Tac.Value( "Ark::Generation",
               Fru.powerGenerationId( self._phyDir ), True )
         _initBessExtraArgs( self._phyConfig ) # TODO BUG987649
      else:
         self.pfCheckTimer.timeMin = Tac.now() + 1
         self.checkPfPresenceCounter += 1

   @Tac.handler( 'entityPtr' )
   def handleFruReady( self, key ):
      traceAlert( f"FruReadyReactor handleFruReady {self._sliceId}" )
      # We only want to react to FruReady in hardware/slice/<sliceId>/
      if not key or key != 'FruReady':
         return

      keyDir = self.notifier_.get( key )
      # Calling handleFruReady from init, we must ensure that we have FruReady
      # populated in hwSliceDir. If it is not set, then we simply bail out.
      if keyDir:
         self.pfCheckTimer.timeMin = Tac.now() + 1
      # We fall into else case when FruReady is not present in the dir
      # i.e  card is removed OR not powered on yet.
      # This case is not handled now, but will be used in future.
      else:
         pass

class SfePhyDriver( Fru.FruDriver ):
   """This Fru plugin manages any object of type Inventory::Phy::SfePhyDir."""

   requires = [ Fru.FruDriver.systemInit, Fru.FruDriver.interfaceInit ]

   managedTypeName = "Inventory::Phy::SfePhyDir"
   managedApiRe = "$"

   def __init__( self, phyDir, parentMib, parentDriver, driverCtx ):
      traceAlert( "Creating a Fru driver for the PhyDir" )
      # if Sfe is running on a hardware platform with an ASIC, e.g. 7170
      # the parentDriver will not have a veosConfig attribute
      self.isVeos = parentDriver and hasattr( parentDriver, 'veosConfig' )
      self.isCeosLab = os.environ.get( 'EOS_PLATFORM', '' ) == 'ceoslab'
      self.platform = getPlatform() or ''
      if self.isVeos \
            and parentDriver.veosConfig[ 'MODE' ] == 'sfe_failsafe':
         traceAlert( "Skipping Sfe PhyDir driver in failsafe mode" )
         return
      Fru.FruDriver.__init__( self, phyDir, parentMib, parentDriver, driverCtx )
      cellId = Fru.fruBase( phyDir ).managingCellId
      sliceId = Fru.fruBase( phyDir ).sliceId

      if cellId:
         hwConfigDir = driverCtx.sysdbRoot[ "hardware" ][ "cell" ][ str( cellId ) ]
      elif sliceId:
         sfeSliceDir = driverCtx.sysdbRoot[ "hardware" ][ "sfe" ][ "slice" ]
         sfeSliceDir.newEntity( 'Tac::Dir', sliceId )
         hwConfigDir = driverCtx.sysdbRoot[ "hardware" ][ "sfe" ][ "slice" ]\
               [ str( sliceId ) ]
      else:
         assert 0, f"{sliceId}, {cellId}, {Fru.fruBase( phyDir )}"

      self.launchConfig = driverCtx.sysdbRoot[ "hardware" ][ "sfe" ]\
               [ "launcherConfig" ]
      self.veosConfig = driverCtx.sysdbRoot[ "hardware" ][ "sfe" ][ "veosConfig" ]

      if not isNimCard( sliceId ):
         # get the qat path from config and save for
         # _sfeInitialize
         self.qatConfigPath = self.veosConfig.qatConfigPath
         self.populateVeosConfig( parentDriver )

      psDir = hwConfigDir.mkdir( "phy/sfe" )
      self.phyConfig_ = psDir.newEntity( "Hardware::Phy::SfePhyConfigDir",
                                                           "config" )

      # This is used for NIMs in ind/cbl
      # This will start a reactor for FruReady which is populated by
      # MainPowerDomainDriver in ModularSystem FruPlugin.
      # FruReadyReactor will handle vf creation, dpdk binding, populating hw model,
      # etc for NIMs.
      if toggleSfeNimSupportEnabled():
         if isNimCard( sliceId ):
            self.fruReadyReactor = FruReadyReactor( driverCtx.sysdbRoot,
                                                    sliceId,
                                                    phyDir,
                                                    self.phyConfig_,
                                                    self.platform )
            return

      populatePhyConfig( self.phyConfig_, sliceId )

      if phyDir.phy and not self.isCeosLab:
         self._deviceCache = {}
         self._dpdkDevices = []
         self._initDeviceCache()
         self._updateDeviceCache( phyDir )

         encapsulating = self._sfeInitialize( phyDir )
         if not encapsulating:
            self.launchConfig.newEntity( 'Tac::Dir', 'Sfe' )
         if encapsulating:
            self.phyConfig_.isSfeSecondaryForwardingAgent = True
            # Strata boxes generally have lower amount of memory (at least on T3)
            # Sfe is used only for NAT there, so reserving RAM at this point for
            # hugepages is undesirable. In other cases do the allocation.
            if self.phyConfig_.hwPlatformType != "Strata":
               self._allocateHugePages()
            # for now we only support one encapsulating phy and
            # it cannot be mixed with non-encapsulating phys
            assert len( phyDir.phy ) == 1
         self.phyConfig_.sfeFruPluginDone = True
         traceDetail( "isSfeSecondaryForwardingAgent:"
                      f"{self.phyConfig_.isSfeSecondaryForwardingAgent} "
                      f"hwPlatformOpenFlow:{self.phyConfig_.hwPlatformOpenFlow} "
                      f"sfeFruPluginDone:{self.phyConfig_.sfeFruPluginDone}" )

      if self.isCeosLab:
         self.phyConfig_.sfeFruPluginDone = True

      # Set generation id based on Fru base
      self.phyConfig_.generation = Tac.Value( "Ark::Generation",
            Fru.powerGenerationId( phyDir ), True )
      if platformHasNac():
         _initBessExtraArgs( self.phyConfig_ )


   def _allocateHugePages( self ):
      if os.environ.get( 'SIMULATION_VMID' ):
         traceDetail( "Skipping hw platform hugepage allocation" )
         return
      # Skip hugepage allocations for a low memory system when Sfe is running as
      # secondary forwarding agent. The presence of other forwarding agents means
      # that there will likely not be enough memory available.
      with open( '/proc/meminfo' ) as f:
         for line in f.readlines():
            if 'MemTotal:' in line and int( line.split()[ 1 ] ) <= 4 * 1024 * 1204:
               traceDetail( "Skipping low mem platform hugepage allocation" )
               return
      huge1GBPath = "/sys/kernel/mm/hugepages/hugepages-1048576kB"
      huge2MPath = "/sys/kernel/mm/hugepages/hugepages-2048kB"
      thirtyTwoBit = ( platform.architecture()[ 0 ] == '32bit' )
      # We run Sfe in 32-bit mode on some mixed ASIC platforms such as the 7170.
      # In this mode having 1G huge pages causes the Sfe process to use up more of
      # its virtual memory address space because the huge page are mapped in chunks
      # of 1G into the address space. Using 2M pages is not as good for caching but
      # increase the granularity in which memory can be mapped reducing the overall
      # virtual memory usage of the Sfe process thus allowing for more route scale.
      if not thirtyTwoBit and os.path.isdir( huge1GBPath ):
         try:
            with open( huge1GBPath + "/nr_hugepages", "w" ) as memFile:
               memFile.write( "1" )
         except OSError:
            traceAlert( "Unable to allocate 1x1GB hugepages on hw platform" )
      if os.path.isdir( huge2MPath ):
         try:
            with open( huge2MPath + "/nr_hugepages", "w" ) as memFile:
               if thirtyTwoBit:
                  memFile.write( "640" )
               else:
                  memFile.write( "128" )
         except OSError:
            traceAlert( "Unable to allocate 128x2M hugepages on hw platform" )
      traceDetail( "Allocated 1x1GB hugepages on running hw platform" )

   def addDeviceInCache( self, devName, mac, pci, driver ):
      if devName not in self._deviceCache[ DEVICE ]:
         self._deviceCache[ DEVICE ].append( devName )
      self._deviceCache[ MAC ][ devName ] = mac
      self._deviceCache[ PCI ][ devName ] = pci
      self._deviceCache[ DRIVER ][ devName ] = driver

   def _updateDeviceCache( self, phyDir ):
      # On Baremetal platform, deviceCache file is not updated by Sfa Fru,
      if ( platformBareMetal() and
           not os.path.isfile( deviceCacheFile ) ):
         with open( deviceCacheFile, 'w' ) as f:
            json.dump( self._deviceCache, f )
         for phy in phyDir.phy.values():
            devName = phy.port.intfId.replace( "Ethernet", "et" )
            output = Tac.run( [ "ethtool", "-i", devName ], stdout=Tac.CAPTURE )
            pci = re.search( 'bus-info: (.+)', output ).group( 1 )
            driver = re.search( 'driver: (.+)', output ).group( 1 )
            mac = phy.port.macAddr
            self.addDeviceInCache( devName, mac, pci, driver )

   def _initDeviceCache( self ):
      self._deviceCache[ FIRST_MAC ] = ""
      self._deviceCache[ DEVICE ] = []
      self._deviceCache[ MAC ] = {}
      self._deviceCache[ PCI ] = {}
      self._deviceCache[ DRIVER ] = {}
      self._deviceCache[ ROLE ] = {}

   def _readDeviceCache( self ):
      with open( deviceCacheFile ) as f:
         self._deviceCache = json.load( f )
      dpdkDevs = Tac.run( [ "/usr/share/dpdk/tools/dpdk-devbind.py", "--status",
                            "--status-dev", "net" ],
                            stdout=Tac.CAPTURE )
      # Remove headers
      dpdkDevs = dpdkDevs.split( "\n" )[ 3 : -1 ]
      for dev in dpdkDevs:
         pci = dev.split( " " )[ 0 ]
         if pci not in self._dpdkDevices:
            self._dpdkDevices.append( pci )
      traceDetail( f"Device Cache = {self._deviceCache}" )
      traceDetail( f"DPDK devices = {self._dpdkDevices}" )

   def _isNacPort( self, devName ):
      if TYPE in self._deviceCache:
         devType = self._deviceCache[ TYPE ].get( devName, "" )
         if devType == 'NAC':
            return True
      return False

   def _getDevType( self, devName ):
      nicType = Tac.Type( "Sfe::NicType" )
      if TYPE in self._deviceCache:
         devType = self._deviceCache[ TYPE ].get( devName, "nicUnknown" )
         if devType in [ "NAC", "XL710" ]:
            devType = devType.lower()
         return Tac.enumValue( nicType, devType )
      return nicType.nicUnknown

   def _getPfName( self, devName ):
      if VF2PF in self._deviceCache:
         pfName = self._deviceCache[ VF2PF ].get( devName, "Unknown" )
         return pfName
      return "Unknown"

   def _getMacAddr( self, devName ):
      if MAC in self._deviceCache:
         macAddr = self._deviceCache[ MAC ].get( devName, "" )
         return macAddr
      return ""

   def _getPci( self, devName ):
      pciAddress = ""
      if PCI in self._deviceCache:
         pciAddress = self._deviceCache[ PCI ].get( devName, "" )
      traceDetail( f"PCI in Cache = {pciAddress}" )
      if pciAddress == "":
         # If the cache doesn't have it return empty string
         traceDetail( f"PCI = {pciAddress} Not Found in Cache" )
         return ""
      if pciAddress in self._dpdkDevices or 'SIMULATION_VMID' in os.environ:
         traceDetail( f"PCI = {pciAddress} Found" )
         # DPDK has this device
         return pciAddress
      if "hv_et" in devName:
         # netvsc device is still with kernel
         return pciAddress
      # if the device is not with DPDK return empty string
      traceDetail( f"PCI = {pciAddress} Neither with kernel nor with DPDK" )
      return ""

   def _getDriver( self, devName ):
      pciDriver = ""
      if DRIVER in self._deviceCache:
         pciDriver = self._deviceCache[ DRIVER ].get( devName, "" )
      traceDetail( f"Driver in Cache = {pciDriver}" )
      if pciDriver in ( "", None ):
         traceDetail( f"Driver for {devName} Not Found in Cache" )
      elif _isDriverSriov( pciDriver.encode() ):
         traceDetail( f"Driver = {pciDriver} is SR-IOV" )
      else:
         traceDetail( f"Driver = {pciDriver} is non SR-IOV" )
      return pciDriver

   def getVeosConfig( self ):
      """Open the veos-config that is created from the EosCloudInit
      script and grabs information from it..
      Returns:
         A dictionary with the veosconfig/bess params"""
      mergedFinalConfig = {}
      with open( CLOUD_BESS_CONFIG ) as cloudLog:
         for line in cloudLog:
            jsonMatch = re.match( r"(\S+)=(\d+|\S+)", line )
            if jsonMatch:
               key = jsonMatch.group( 1 )
               val = jsonMatch.group( 2 )
               if re.match( r"\d+", val ):
                  val = int( val )
               if val == "False":
                  val = False
               elif val == "True":
                  val = True
               mergedFinalConfig[ key ] = val
         return mergedFinalConfig

   def populateVeosConfig( self, parentDriver ):
      if platformCaravan():
         mergeFinalConfig = self.getVeosConfig()
         self.veosConfig.bessMemoryMb = mergeFinalConfig[ "bessMemoryInMb" ]
         self.veosConfig.bessBuffers = mergeFinalConfig[ "bessBuffers" ]
         self.veosConfig.vrfScale = bool( mergeFinalConfig.get(
                     'vrfScaleEnabled', False ) )
         self.veosConfig.maxDefaultVrfs = int( mergeFinalConfig.get(
                    'defaultfMaxVrfs', 8 ) )
         self.veosConfig.platformRuby = bool( mergeFinalConfig.get(
                    'platformRuby', False ) )

         if platformHasNac():
            self.veosConfig.platformHasNac = True

         self.veosConfig.configUpdated = True

      if platformOcteon10():
         self.veosConfig.platformOcteon10 = True

      if self.isVeos and not self.veosConfig.configUpdated:
         traceAlert( "Setting up Sfe/bess config parameters" )
         # Update the veosConfig before Sfe starts to run bessd.
         # Launcher will not run it due to runnability...
         self.veosConfig.bessMemoryMb = int( parentDriver.veosConfig.get(
                                    'bessMemoryInMb', 0 ) )
         self.veosConfig.bessBuffers = int( parentDriver.veosConfig.get(
                                    'bessBuffers', 0 ) )
         self.veosConfig.vrfScale = bool( distutils.util.strtobool(
                    parentDriver.veosConfig.get( 'vrfScaleEnabled', 'False' ) ) )
         self.veosConfig.maxDefaultVrfs = int( parentDriver.veosConfig.get(
                    'defaultfMaxVrfs', 8 ) )
         self.veosConfig.legacyMem = bool( distutils.util.strtobool(
                    parentDriver.veosConfig.get( 'legacyMemEnabled', 'False' ) ) )
         self.veosConfig.hyperThreading = bool( parentDriver.veosConfig.get(
                    'hyperThreading', 'False' ) )
         self.veosConfig.configUpdated = True

   def turnOffSpoofCheck( self, intfId ):
      vfId = self.pf2vf[ intfId ]
      Tac.run( [ "ip", "link", "set", f"eth{intfId}",
               "vf", f"{vfId}", "spoofchk", "off", "trust", "on" ],
               stdout=Tac.CAPTURE )

   def turnOffSpoofCheckZeroVf( self, intfId ):
      Tac.run( [ "ip", "link", "set", f"eth{intfId}",
               "vf", "0", "spoofchk", "off", "trust", "on" ],
               stdout=Tac.CAPTURE )

   def setHwModeVEPA( self, intfId ):
      # Choose VEPA or external switching virtual port aggregator model instead of
      # the default VEB/EVB internal embedded switching model.
      # VEPA mode will help in sending out packets with
      # srcMac == dstMac == VSI(vNIC) MAC
      # We do not have a use-case of VM-VM(VF-VF) fast internal switching
      # and use only one VF per PF. So VEB mode is not really required.

      # Virtual Ethernet Port Aggregator(VEPA) - This is an IEEE EVB term.
      # A VEPA multiplexes the traffic of one or more VSIs onto a single
      # Ethernet port.
      # The biggest difference between a VEB and a VEPA is that a VEB can
      # switch packets internally between VSIs, whereas a VEPA cannot.
      # VM-to-VM switching is performed by an adjacent bridge.
      Tac.run( [ "bridge", "link", "set", "dev", f"eth{intfId}", "hwmode", "vepa" ] )

   def disableFwLLDP( self, intfId ):
      # XL710 NIC has an embedded LLDP agent which eats the LLDP packets
      # and the PF can't pass them up to the VFs. For LLDP to work on EOS,
      # we need the packets to travel up to the control plane. So, disable
      # the firmware LLDP agent on XL710 PF ports. Note that the same is
      # done for the NAC ports by an Intel patch to the ice_swx driver
      Tac.run( [ "ethtool", "--set-priv-flags", f"eth{intfId}",
         "disable-fw-lldp", "on" ], stdout=Tac.CAPTURE )

   def disablePFArpAndMulticast( self, intfId ):
      # Disable ARP and Multicast on the PF interface
      # We are chosing to keep the PF interface linkUp by design to handle
      # the link/up down events.
      # Hence we keep the VSI(Virtual Station Interface) corresponding
      # to PF linkUp and disable ARP and multicast packet processing on it.
      Tac.run( [ "ip", "link", "set", f"eth{intfId}",
                              "arp", "off" ],
                              stdout=Tac.CAPTURE )
      Tac.run( [ "ip", "link", "set", f"eth{intfId}",
                              "multicast", "off" ],
                              stdout=Tac.CAPTURE )

   def disablePFRxtx( self, intfId ):
      Tac.run( [ "ethtool", "--set-priv-flags", f"eth{intfId}",
         "disable-pf-rxtx", "on" ], stdout=Tac.CAPTURE )

   def installKernelModules( self ):
      # Install bess and igb_uio kernel modules
      traceAlert( "_sfeInitialize: Loading kernel modules" )
      traceDetail( "_sfeInitialize: Loading kernel modules" )
      if 'SIMULATION_VMID' not in os.environ:
         Tac.run( [ "modprobe", "-a", "bess" ] )
         if platformCaravan() or self.platform == 'Alibaba' or \
            platformOcteon10():
            enableSriov = 0
            # Install VFIO drivers required only for Caravan & Alibaba cloud
            if platformHasNac():
               Tac.run( [ "modprobe", "vfio", "enable_unsafe_noiommu_mode=1" ] )
               Tac.run( [ "modprobe", "ipsec_inline" ] )
               Tac.run( [ "modprobe", "ies" ] )
               Tac.run( [ "adf_ctl", "down" ] )
               Tac.run( [ "adf_ctl", "--config", self.qatConfigPath, "up" ] )
            elif platformOcteon10():
               Tac.run( [ "modprobe", "vfio", "enable_unsafe_noiommu_mode=0" ] )
               enableSriov = 1
            else:
               Tac.run( [ "modprobe", "vfio" ] )
            Tac.run( [ "modprobe", "vfio-iommu-type1" ] )
            Tac.run( [ "modprobe", "vfio-pci", f"enable_sriov={enableSriov}" ] )
         else:
            Tac.run( [ "modprobe", "vfio-pci" ] )
            # Install ib_uverbs required only for Azure Sfe
            if self.platform == 'Azure':
               Tac.run( [ "modprobe", "-a", "ib_uverbs" ] )
               Tac.run( [ "modprobe", "-a", "uio_hv_generic" ] )

   def populateStrataSfePhyDir( self, fruPhy, vdevName ):
      traceDetail( "Header format: 'strataDmaDriverCpuHeader'" )
      encapsulating = fruPhy.encapsulating
      capability = Tac.newInstance( "Hardware::Phy::SfePhyCapability" )
      capability.l3SubintfSupported = True
      Fru.Dep( self.phyConfig_.phy, fruPhy ).newMember(
         fruPhy.name, '', MAX_SUPPORTED_MTU,
         Tac.Value( "Arnet::EthAddr", 0, 0, 0 ),
         Tac.Value( "Inventory::PciAddress", domain=0, bus=0, slot=0,
            function=0 ), '', 0, 0, False, vdevName, fruPhy.headerFormat,
         fruPhy.encapsulating, capability, False,
         Tac.Type( "Sfe::NicType" ).nicUnknown, '' )
      self.phyConfig_.hwPlatformType = "Strata"
      return encapsulating

   def getDeviceRealPath( self, devName ):
      deviceStr = "" if self.platform == 'Azure' else "device"
      sysPath = os.path.join( "/sys/class/net", devName, deviceStr )
      return os.path.realpath( sysPath )

   def installVfioDrivers( self, pciAddress ):
      # Install VFIO drivers -
      # options are specified in /etc/modprobe.d/vfio.conf
      Tac.run( [ "modprobe", "vfio" ] )
      Tac.run( [ "modprobe", "vfio-iommu-type1" ] )
      Tac.run( [ "modprobe", "vfio-pci" ] )
      Tac.run( [ dpdkDir + "tools/dpdk-devbind.py", "--force", "-b",
                 "vfio-pci", pciAddress ] )

   def setupHugePageTables( self ):
      # Set up for huge tables.
      traceAlert( "_sfeInitialize: - setting up huge pages " )
      Tac.run( [ "mkdir", "-p", "/dev/hugepages1G" ] )
      Tac.run( [ "mkdir", "-p", "/dev/hugepages2M" ] )

      if 'SIMULATION_VMID' not in os.environ:
         try:
            Tac.run( [ "mount", "-t", "hugetlbfs", "-o",
                       "pagesize=1G", "none", "/dev/hugepages1G" ] )
         except Tac.SystemCommandError:
            traceAlert( "Fru could not mount 1G hugepages..." )

         try:
            Tac.run( [ "mount", "-t", "hugetlbfs", "-o",
                       "pagesize=2M", "none", "/dev/hugepages2M" ] )
         except Tac.SystemCommandError:
            traceAlert( "Fru could not mount 2M hugepages..." )

   def restartBess( self ):
      traceDetail( "Not a hotplug scenario. Restart bess" )
      Tac.run( [ "pkill", "bessd" ], ignoreReturnCode=True )

   def _sfeInitialize( self, phyDir ):
      if os.environ.get( 'SKIP_SFE_INITIALIZE', None ):
         traceDetail( "Skipping sfe initialization for test" )
         return False
      mapIRQ()
      traceAlert( "_sfeInitialize: rebinding interfaces to DPDK drivers" )
      softRestart = False
      hotPlug = False
      encapsulating = False
      vdevName = ''
      # Early check if we asked to run on Strata
      # In this case we are expecting encapsulating interfaces which
      # does not require any drivers to be initialized
      strataDmaDriver = any( ( fruPhy.headerFormat == 'strataDmaDriverCpuHeader'
         for fruPhy in phyDir.phy.values() ) )
      if not strataDmaDriver:
         self._readDeviceCache()

         if VF2PF in self._deviceCache:
            traceAlert( "_sfeInitialize: config port settings" )
            portSettingCmds = {
               'turnOffSpoofCheck': self.turnOffSpoofCheck,
               'turnOffSpoofCheckZeroVf': self.turnOffSpoofCheckZeroVf,
               'setHwModeVEPA': self.setHwModeVEPA,
               'disablePFArpAndMulticast': self.disablePFArpAndMulticast,
               'disableFwLLDP': self.disableFwLLDP,
               'disablePFRxtx': self.disablePFRxtx,
            }
            nacIntfIds = []
            for dev in self._deviceCache[ DEVICE ]:
               if self._isNacPort( dev ):
                  nacIntfId = self._getPfName( dev )
                  if nacIntfId != "Unknown" and len( nacIntfId ) > 3:
                     nacIntfIds.append( nacIntfId[ 3 : ] )
            nacIntfIds.sort()
            self.pf2vf = { intfId: idx for idx, intfId in enumerate( nacIntfIds ) }

            for dev, phy in phyDir.phy.items():
               for portSetting in phy.portSettings:
                  intfId = self._getPfName( dev )
                  if intfId != "Unknown" and len( intfId ) > 3:
                     portSettingCmds[ portSetting ]( intfId[ 3 : ] )

      if os.path.isdir( "/dev/hugepages1G" ) and os.path.isdir( "/dev/hugepages2M" ):
         # System has already been initialized implying that Fru has been restarted.
         # Don't initialize drivers, mappings and huge pages. Kill bessd if
         # it is running
         traceAlert( "_sfeInitialize: softRestart is true" )
         traceDetail( "_sfeInitialize: softRestart is true" )
         softRestart = True

      if not softRestart and not strataDmaDriver:
         self.installKernelModules()

      # Bind all non management PCI devices to the igb_uio module.
      traceAlert( f"Creating Hardware::Phy::SfePhy for {list( phyDir.phy )}" )
      for fruPhy in phyDir.phy.values():
         if fruPhy.headerFormat == 'strataDmaDriverCpuHeader':
            encapsulating = self.populateStrataSfePhyDir( fruPhy, vdevName )
            continue
         devName = fruPhy.name
         traceDetail( f"Device {devName}" )
         # Extract the PCI adress in the form NN:MM.O from the device path
         # and bind this device to the DPDK poll mode driver.
         path = self.getDeviceRealPath( devName )
         dpdkDev = False
         traceDetail( f"platform = {self.platform}" )
         traceDetail( f"platformCloud = {platformCloud()}" )
         traceDetail( f"path = {path}" )
         traceDetail( f"devName = {devName}" )
         traceDetail( f"devName in path {devName in path}" )
         if devName in path:
            # Device might be already bound to DPDK
            pciAddress = self._getPci( devName )
            pciDriver = self._getDriver( devName )
            traceDetail( f"Device {devName} PCI {pciAddress} Driver {pciDriver}" )
            assert pciAddress, f"ERROR: Device {devName}, has no pciAddress"
            traceDetail( f"Device {devName} is with DPDK" )
            dpdkDev = True
         else:
            # Device is owned by kernel driver
            pciAddress, pciDriver = getPciInfoUsingEthtool( devName )
            # If this interface was not with DPDK and the scenario
            # is soft restart then it must be a hotplug interface
            if softRestart:
               traceDetail( f"Device {devName} is hot plugged" )
               hotPlug = True
         # On willamette platform, kernel driver probe fails
         # if certain SFPs are inserted
         if platformWillamette():
            dpdkDev = False

         if self.platform == 'Azure':
            dpdkDev = False
            vdevName = pciAddress
         iesPortId, tacPciAddress = createTacPciAddress( self.platform, pciAddress )
         pciDriver = str( pciDriver ).encode( 'utf-8' )

         if not dpdkDev:
            bindDeviceToDpdk( devName, pciAddress, self.platform )

         port = fruPhy.port
         # 'Phyet' are ethernet interfaces used in vEOS
         traceDetail( f"Phy {fruPhy.name} --> {port.intfId}" )

         capability = createSfePhyCapability( pciDriver, port )

         if fruPhy.encapsulating:
            encapsulating = True
            if not softRestart and 'SIMULATION_VMID' not in os.environ:
               self.installVfioDrivers( pciAddress )
         Fru.Dep( self.phyConfig_.phy, fruPhy ).newMember(
            fruPhy.name, port.intfId, MAX_SUPPORTED_MTU, self._getMacAddr( devName ),
            tacPciAddress, pciDriver.decode(), iesPortId, port.id - 1,
            False, vdevName, fruPhy.headerFormat, fruPhy.encapsulating, capability,
            self._isNacPort( devName ), self._getDevType( devName ),
            self._getPfName( devName ) )
         # add any required shaping if present
         for speed in fruPhy.shapingReqd:
            self.phyConfig_.phy[ fruPhy.name ].shapingReqd.add( speed )

      if not softRestart:
         self.setupHugePageTables()
      if not hotPlug:
         self.restartBess()
      return encapsulating

def Plugin( context ):
   traceDetail( "Sfe plugin registering with Fru..." )
   context.registerDriver( SfePhyDriver )
   mg = context.entityManager.mountGroup()
   mg.mount( 'hardware/sfe/launcherConfig', 'Tac::Dir', 'wi' )
   mg.mount( 'hardware/sfe/veosConfig', 'Sfe::VeosConfig', 'wi' )
   mg.close( None )
