#!/usr/bin/env python3
# Copyright (c) 2007, 2011 Arista Networks, Inc.  All rights reserved.
# Arista Networks, Inc. Confidential and Proprietary.

import optparse, sys, ManagedSubprocess # pylint: disable=deprecated-module

usage = """%prog [options] [<command> args ...]

This program is a memory test wrapper for any executable on the Raven Cpu platform.
It uses the ecc error counter in the Raven hardware to detect memory errors and 
returns the following codes.
  0 - if there is no error
  1 - if the child dies
  2 - if the child was killed
  4 - if there are ecc errors detected during the test
  5 - if there are ecc errors detected during the test and the child died
  6 - if there are ecc errors detected during the test and the child was killed

Nothing is printed to stdout on a successful run of the subprocess with no new ecc
errors.  If no argument is specified, then it simply checks for any logged ecc errors
without running a child process print the total number of lifetime logged memory
errors and return 0 as the exit code.
"""

# Helper functions and global state
def readInt( file, offset, length ):
   file.seek( offset )
   raw = file.read( length )
   val = 0
   for i in range( length-1, -1, -1 ):
      val = ( val << 8 ) | ord ( raw[ i ] )
   return val

def getErrorReg( reg ):
   # pylint: disable-next=consider-using-with
   f = open( "/sys/bus/pci/devices/0000:00:18.3/config", "rb" )
   return readInt( f, reg, 4 )

def ravenCheckErrCount( ):
   errReg = getErrorReg( 0x160 )
   # check that the counter is enabled
   assert( ( 1 << 31 ) & errReg ) # pylint: disable=superfluous-parens
   assert( ( 1 << 30 ) & errReg ) # pylint: disable=superfluous-parens
   assert not ( ( 1 << 29 ) & errReg ) # pylint: disable=superfluous-parens

   return getErrorReg( 0x160 ) & 0xfff 

def background( *cmd ):
   p = ManagedSubprocess.Popen(argv=cmd, prefix="")
   return p

# Parse Args
parser = optparse.OptionParser(usage=usage)
parser.add_option( "-v", "--verbose", help="Add some debugging output", 
                   action="store_true")
( options, args ) = parser.parse_args()

# Get initial error read 
startingErrors = ravenCheckErrCount()
if startingErrors == 0xFFF:
   print( 'This memory has maxed out its lifetime error count. It is probably bad.' )
   sys.exit( 4 )

# Run the child and get the result
exitCode = 0
if args:
   child = background( *args )
   rc = child.wait()
   if rc > 0:
      print( args[ 0 ], "exited with error code", rc )
      exitCode = 1
   elif rc < 0:
      print( args[ 0 ], "received a signal with error code", rc )
      exitCode = 2
else:
   print( 'Total errors detected in memory lifetime: ', startingErrors )
   sys.exit( exitCode )
   
# Check errors again and set up the right exit code
endingErrors = ravenCheckErrCount()
if endingErrors != startingErrors:
   exitCode = 4 + exitCode
   newErrs = endingErrors - startingErrors 
   # pylint: disable-next=consider-using-f-string
   print( "Detected %s new memory errors after executing" % newErrs )
sys.exit( exitCode )
