#!/usr/bin/env python3
# Copyright (c) 2007, 2011 Arista Networks, Inc.  All rights reserved.
# Arista Networks, Inc. Confidential and Proprietary.

# pylint: disable=consider-using-f-string
# pylint: disable=redefined-outer-name

import Tac, optparse, sys, ManagedSubprocess # pylint: disable=deprecated-module

usage = """%prog [options] [<command> args ...]

This program is a memory test wrapper for any executable.
It uses the edac (error detection and correction) support in the linux kernel
to check for errors before and after the executable has run.
The script exits with the following exit codes:
  0 - if there is no error
  1 - if the child dies
  2 - if the child was killed
  3 - if the requested memory controller is not found
  4 - if there are ecc errors detected during the test
  5 - if there are ecc errors detected during the test and the child died
  6 - if there are ecc errors detected during the test and the child was killed
Nothing is printed to stdout on a successful run of the subprocess with no ecc
errors.  If no argument is specified, then it simply checks for ecc errors
without running a child process
"""


#--------------------------------
# Helper functions and global state
#--------------------------------
mcDir = '/sys/devices/system/edac/mc'

def eccCounters( mc ):
   # pylint: disable-next=consider-using-with
   ce_count = int( open( mcDir + '/%s/ce_count' % mc ).read() )
   # pylint: disable-next=consider-using-with
   ue_count = int( open( mcDir + '/%s/ue_count' % mc ).read() )
   return ce_count, ue_count

def eccCounterReset( mc ):
   # pylint: disable-next=consider-using-with
   open( mcDir + '/%s/reset_counters' % mc, "w" ).write( "0" )

def mcName( mc ):
   return open( mcDir + '/%s/mc_name' % mc ).read()

def checkPresent( mc ):
   try:
      mcName( mc )
   except OSError as e:
      print( f"Unable to locate memory controller {mc}: {e}" )
      sys.exit( 3 )

def background( *cmd ):
   p = ManagedSubprocess.Popen(argv=cmd,prefix="")
   return p

#--------------------------------
# Parse Args
#--------------------------------
parser = optparse.OptionParser(usage=usage)
parser.add_option( "-v", "--verbose", help="Add some debugging output",
                   action="store_true")
parser.add_option( "-m", "--memctl", help="Select the memory controller "
                   "(default %default)", default="mc0", metavar="MC")
parser.add_option( "-r", "--reset",
                   help="Reset ecc counters before and after test",
                   action="store_true")
parser.add_option( "-d", "--dir", help="Set where to look for controller "
                   "directories (for testing, default is %default)",
                   default='/sys/devices/system/edac/mc')
( options, args ) = parser.parse_args()

mc = options.memctl
mcDir = options.dir 

#--------------------------------
# Check that the memory controller is there
#--------------------------------
checkPresent( mc )

if options.verbose:
   print( f"testing memory controller {mc}: {mcName( mc )}" )

#--------------------------------
# Reset ecc counters
#--------------------------------
if args:
   ce_count, ue_count = eccCounters( mc )
   if options.reset:
      if ce_count or ue_count:
         print( "Before reset: %s correctable and %s non-correctable errors" %
                ( ce_count,ue_count ) )
      eccCounterReset( mc )
      ce_count,ue_count = 0,0
      print( "Reset ecc counters." )
else:
   ce_count,ue_count = 0,0

#--------------------------------
# Run the child and get the result
#--------------------------------
exitCode = 0
if args:
   child = background( *args )
   rc = child.wait()
   if rc > 0:
      print( args[ 0 ], "exited with error code", rc )
      exitCode = 1
   elif rc < 0:
      print( args[ 0 ], "received a signal with error code", rc )
      exitCode = 2
   
#--------------------------------
# Check errors again and set up the right exit code
#--------------------------------
ce_count2, ue_count2 = eccCounters( mc )
if ce_count != ce_count2 or ue_count != ue_count2:
   exitCode = 4 + exitCode
   print( "Detected %s correctable and %s non-correctable errors" %
          ( ce_count2,ue_count2 ), end=' ' )
   # pylint: disable-next=multiple-statements
   if args: print( f"(+{ce_count2-ce_count} and +{ue_count2-ue_count})" )
   else: print()

#--------------------------------
# Reset counters if requested
#--------------------------------
if options.reset:
   eccCounterReset( mc )

sys.exit( exitCode )
