#!/usr/bin/env python3
# Copyright (c) 2024 Arista Networks, Inc.  All rights reserved.
# Arista Networks, Inc. Confidential and Proprietary.
"""
sysrestest.py

This script requires root privilege.

Runs one of two tests depending on the setting of an environmental variable.

Memory Testing Using Standard Configuration
-------------------------------------------
Holds memory in three phases, each phase holds an additional 10.*% of the memory
then sleeps for 62 seconds to give the agent a chance to see it's memory growth
and tweet a memholder Canary with the percentage of memory, sizes, state etc...
So we can verify it tracks memory usage.

We expect to see one Threshold type of record on a 4GiB DUT and one Threshold
and at least one Increment record on larger DUTs type records, these will be
Type: T and Type: I.

Memory Testing Using a reloaded and altered Configuration File for retweets
---------------------------------------------------------------------------
Reloads the configuration file SysResMonitor.conf with different settings
from the default configuration file:

Name=ProcessMemory,
Type=ProcScan,
Frequency=30,
ThresholdMiB=2000,
ThresholdPct=10,
IncrementPct=5,
Refresh=120

A second reload with another alternate configuration file SysResMonitor.conf
with different settings from the the previous altered configuration file:

Name=ProcessMemory,
Type=ProcScan,
Frequency=30,
ThresholdMiB=1000,
ThresholdPct=10,
IncrementPct=2,
Refresh=120

This configuration changes all the input parameters so it is a good test
that the configuration processing daemon code is working correctly.
This tests faster frequency, lower Thresholds and Increment values
as well as a very fast refresh so we can test that retweets occur.

On x86_64 switches (8 GiB and up) the test will generate a Threshold and
an Increment Canary and then sleep for some time to allow several retweet
Canaries to be emitted and recognized. This is possible due the adjusted
configuration SysResMonitor.conf file described above.

For i386 switches that are limited to 32 bit address space we just expect
a Threshold Canary though smaller switches may also emit an Incremental
Canary. The important issue here is the sleep with memory held to allow
us to recognize several retweet Canaries. This is again possible due to
the adjusted configuration SysResMonitor.conf file described above.

This also tests reloading the configuration file to pick up the adjusted
values we want for testing.

Written for Python Version: 3.9.17
Date:   2024/03/04
Author: echron@arista.com
"""
import math
import os
import multiprocessing
import signal
import subprocess
from subprocess import Popen, PIPE
import sys
import time
from typing import List

AGENT_NAME = "SysResMonitor"

def memsizeMiBs():
   """
   Get Memory Size in MiB
   """
   totalMiBs = 0
   try:
      cmd = [ 'free', '-m' ]
      res = subprocess.run( cmd, capture_output=True, text=True, check=True )
      outlines = res.stdout.splitlines()
      header = outlines[ 0 ].split()
      values = outlines[ 1 ].split()

      totalIndx = header.index( 'total' )
      totalMiBs = int( values[ totalIndx + 1 ] )

      print( f"Total Memory: {totalMiBs} MiB" )

   except subprocess.CalledProcessError as err:
      print( f"Error running cmd: 'free -m': {err}" )

   return totalMiBs

def allocateMemMiB():
   """
   Allocate some chunks of memory
   """
   arrbytes = []
   for _ in range( 256 ):
      bytesarr = bytearray( [ 0 ] * 4096 )
      arrbytes.append( bytesarr )

   return arrbytes

def allocMemAndHold( mibs: int, sleepsecs ):
   """
   Allcoate the specified memory and sleep for the specified seconds
   """
   addrs = []

   for _ in range( mibs ):
      addr = allocateMemMiB()
      addrs.append( addr )

   print( f"Sleeping for: {sleepsecs} sec" )
   time.sleep( sleepsecs )

   return addrs

def allocmem( pct: int, sleepsecs: int ) -> None:
   """
   Drive memory allocation for 32 bit and 64 bit processes
   """
   tarrbytes = []
   testmibs = 0

   starttime = time.time()
   pid = os.getpid()

   totalmem = memsizeMiBs()
   print( f"64-Bit Pid: {pid} Total Memory Size: {totalmem} MiB" )

   if pct == 0:
      if totalmem < 4096:
         testmibs = 525
      else:
         testmibs = 1700
      pct = ( testmibs / totalmem ) * 100
   else:
      testmibs = math.ceil( totalmem * 0.01 ) * pct
   print( f"Allocating {pct:.1f}% of Total Memory Size: { testmibs } MiB" )

   runs = 2
   ssize = 0
   for _ in range( runs ):
      arrbytes = allocMemAndHold( testmibs, sleepsecs )
      tarrbytes.append( arrbytes )
      ssize += testmibs * 1048576 + sys.getsizeof( arrbytes )

   endtime = time.time()
   elapsed = endtime - starttime
   print( "Done allocating memory and sleeping, " )
   print( f"Size of strings: {ssize} bytes" )
   print( f"Run Time: {elapsed:.3f} secs" )

   return tarrbytes

def sendSignal( pid: str, signum: int ) -> None:
   """
   Send the specified signal to the specified process
   """
   os.kill( int( pid ), signum )

def createAltConfFile( filePath: str, cflines: List[ str ] ) -> None:
   """
   Write a conf file that we can get SysResMonitor to reload and use.
   """
   print( f"cflines: {cflines}" )

   with open( filePath, "w" ) as cfile:
      for line in cflines:
         cfile.write( line + "\n" )

   print( f"Created file: {filePath}" )

def removeAltConfFile() -> None:
   """
   Remove the conf file to clean up
   """
   filePath = "/mnt/flash/persist/SysResMonitor.conf"
   if not os.path.exists( filePath ):
      print( f"File: {filePath} does not exist, removal complete" )
      return
   try:
      os.remove( filePath )
      print( "File removed successfully" )
   except OSError as err:
      print( f"Error: remove {filePath} failed error: {err}" )

def checkRootPrivilege() -> None:
   """
   Return true if this script is running with root privilege
   """
   return os.geteuid() == 0

def pidFromName( name: str ) -> ( bool, str ):
   """
   Return pid from name, expecting just one pid and otherwise aborts
   """
   cmd = [ "pidof", name ]
   with Popen( cmd, stdout=PIPE, stderr=PIPE, text=True ) as proc:
      retcode = proc.wait()
      if retcode != 0:
         return False, "0"
      spidstr, _ = proc.communicate()
      spids = spidstr.split()
      assert len( spids ) == 1
      return True, spids[ 0 ]

def verifyAgentRunning() -> str:
   """
   Verify agent is running otherwise exit program
   """
   serviceName = AGENT_NAME
   exists, pid = pidFromName( serviceName )
   if not exists:
      print( "No action taken: Agent is not currently running" )
      sys.exit( 1 )
   return pid

def reloadSysResMonitor() -> None:
   """
   Reload the agent so it will pick up the new config file
   Requires root privilege.
   """
   serviceName = AGENT_NAME
   pid = verifyAgentRunning()

   try:
      subprocess.run( [ "systemctl", "reload", serviceName ], check=True )
      print( f"Successful reload of Service: {serviceName}" )
   except subprocess.CalledProcessError as err:
      print( f"Failed to reload Service: {serviceName} error: {err}" )
      sendSignal( pid, signal.SIGHUP )

def procSize() -> str:
   """
   Return process memory size either 32-bit or 64-bit
   """
   processSize = "32"
   try:
      cmd = [ 'getconf', 'LONG_BIT' ]
      res = subprocess.run( cmd, capture_output=True, text=True, check=True )
      outlines = res.stdout.splitlines()
      processSize = outlines[ 0 ]
      assert processSize in ( '32', '64' )
   except subprocess.CalledProcessError as err:
      print( f"Error running cmd: {cmd}: {err}" )

   print( f"Process size: {processSize}" )
   return processSize

def displaySettings( pid: str ) -> None:
   """
   Display agent configuration settings, used to confirm reload worked
   """
   sendSignal( pid, signal.SIGUSR1 )
   sendSignal( pid, signal.SIGUSR2 )

def runStdConfiguration() -> None:
   """
   Ensure we're running the standard configuration
   """
   pid = verifyAgentRunning()
   removeAltConfFile()
   reloadSysResMonitor()
   displaySettings( pid )

def runAltConfiguration( configuration ) -> None:
   """
   Switch to the specified configuration, do an agent reload
   """
   removeAltConfFile()
   filePath = "/mnt/flash/persist/SysResMonitor.conf"
   createAltConfFile( filePath, configuration )
   reloadSysResMonitor()

def configtest( **kwargs ) -> None:
   """
   Run a configuration test
   """
   pct = kwargs.get( 'pct' )
   sleepsecs = kwargs.get( 'sleepsecs' )
   extrasleep = kwargs.get( 'extrasleep' )
   tarrbytes = []
   try:
      processSize = procSize()
      if processSize == '32':
         arrbytes = allocmem( pct=0, sleepsecs=sleepsecs )
         tarrbytes.append( arrbytes )
      elif processSize == '64':
         arrbytes = allocmem( pct, sleepsecs )
         tarrbytes.append( arrbytes )
      else:
         assert False, "Error: Invalid Process Space Size"
      if extrasleep > 0:
         print( f"Sleeping for: {extrasleep} sec" )
         time.sleep( extrasleep )
      sleepsecs = 15
      print( f"Sleeping for: {sleepsecs} sec" )
      time.sleep( sleepsecs )
   finally:
      totalSize = sum( sys.getsizeof( blk ) for blk in tarrbytes )
      print( f"Standard config, Size memory held: {totalSize} bytes {pct}%" )

def oomScoreAdj() -> str:
   """
   Return our current oom_score_adj value
   """
   procPath = "/proc/self/oom_score_adj"
   oomAdj = 0
   try:
      with open( procPath, 'r' ) as pFile:
         oomAdj = int( pFile.readline().strip() )
         return oomAdj
   except OSError as err:
      print( f"Error: Failed to read: {procPath} error: {err}" )
      return None

   return oomAdj

def oomScoreAdjIs( score: str ) -> None:
   """
   Set our oom_score_adj to the desired value.
   Positive values do not require root priviledge as they make it more likely
   that we'll be selected for OOM Kill.
   """
   procPath = "/proc/self/oom_score_adj"
   try:
      with open( procPath, 'w' ) as pFile:
         pFile.write( score )
   except OSError as err:
      print( f"Error writing to {procPath}: {err}" )

   oomAdj = oomScoreAdj()
   if score != oomAdj:
      print( f"Failed setting oom_score_adj to: {score}" )
      print( f"Current oom_score_adj is: {oomAdj}" )

def memoryTest() -> None:
   """
   Drive memholder testing, we also set the oom_score_adj to become a target
   should an OOM event occur while we're testing.
   """
   starttime = time.time()

   if checkRootPrivilege():
      print( "Program is running with root privilege" )
   else:
      print( "Error: Program is not running with root privilege" )
      return
   oomScoreAdjIs( "950" )
   pid = verifyAgentRunning()

   # Reload with modified config, then reload again to return default config
   cflines = [
      "Name=ProcessMemory,",
      "Type=ProcScan,",
      "Frequency=30,",
      "ThresholdMiB=2000,",
      "ThresholdPct=10,",
      "IncrementPct=5,",
      "Refresh=120"
   ]
   runAltConfiguration( cflines )
   sendSignal( pid, signal.SIGUSR1 )
   kargs = { 'pct': 10, 'sleepsecs': 50, 'extrasleep': 365 }
   proc = multiprocessing.Process( target=configtest, kwargs=kargs )
   proc.start()
   proc.join()

   cflines = [
      "Name=ProcessMemory,",
      "Type=ProcScan,",
      "Frequency=30,",
      "ThresholdMiB=1000,",
      "ThresholdPct=10,",
      "IncrementPct=2,",
      "Refresh=120"
   ]
   runAltConfiguration( cflines )
   kargs = { 'pct': 10, 'sleepsecs': 50, 'extrasleep': 0 }
   proc = multiprocessing.Process( target=configtest, kwargs=kargs )
   proc.start()
   proc.join()
   sendSignal( pid, signal.SIGUSR2 )

   runStdConfiguration()
   kargs = { 'pct': 20, 'sleepsecs': 65, 'extrasleep': 0 }
   proc = multiprocessing.Process( target=configtest, kwargs=kargs )
   proc.start()
   proc.join()

   endtime = time.time()
   elapsed = endtime - starttime
   print( f"Run Time: {elapsed:.3f} secs" )

def main() -> None:
   """
   Product Test program to hold sufficient memory to generate CANARY
   """
   memoryTest()

if __name__ == "__main__":
   main()
