# Copyright (c) 2013 Arista Networks, Inc.  All rights reserved.
# Arista Networks, Inc. Confidential and Proprietary.

import functools
import struct

import Tac
from HadoopRpc import Boolean
from HadoopRpc import ensure
from HadoopRpc import HadoopRpcClient
from HadoopRpc import Integer
from HadoopRpc import Long
from HadoopRpc import ProtoBufWithWritableEngine
from HadoopRpc import String
from HadoopRpc import Writable
from HadoopRpc import WritableRpc

# We'll refuse to de-serialize any array bigger than this.
MAX_ARRAY_SIZE = 1024 * 1024

class JobTrackerClient( HadoopRpcClient ):
   """Hadoop RPC client to communicate with the JobTracker."""

   class Rpc( WritableRpc ):
      """RPCs sent to the JobTracker."""

      PROTOCOL = "org.apache.hadoop.mapred.JobSubmissionProtocol"
      CLIENT_VERSION = 28

   def getProtocolVersion( self ):
      protocol = String( JobTrackerClient.Rpc.PROTOCOL )
      version = Long( JobTrackerClient.Rpc.CLIENT_VERSION )
      return self._sendRpc( JobTrackerClient.Rpc( "getProtocolVersion",
                            protocol, version ), int )

   def getClusterStatus( self, detailed=False ):
      detailed = Boolean( detailed )
      return self._sendRpc( JobTrackerClient.Rpc( "getClusterStatus", detailed ),
                            ClusterStatus )

   def getQueues( self ):
      return self._sendRpc( JobTrackerClient.Rpc( "getQueues" ), JobQueueInfo )

   def getAllJobs( self ):
      return self._sendRpc( JobTrackerClient.Rpc( "getAllJobs" ), JobStatus )

   def jobsToComplete( self ):
      return self._sendRpc( JobTrackerClient.Rpc( "jobsToComplete" ), JobStatus )

   def getJobStatus( self, jobid ):
      assert isinstance( jobid, JobId ), f"Invalid Job ID: {jobid!r}"
      return self._sendRpc( JobTrackerClient.Rpc( "getJobStatus", jobid ),
                            JobStatus )

   def getJobProfile( self, jobid ):
      assert isinstance( jobid, JobId ), f"Invalid Job ID: {jobid!r}"
      return self._sendRpc( JobTrackerClient.Rpc( "getJobProfile", jobid ),
                            JobProfile )

   def getJobCounters( self, jobid ):
      assert isinstance( jobid, JobId ), f"Invalid Job ID: {jobid!r}"
      return self._sendRpc( JobTrackerClient.Rpc( "getJobCounters", jobid ),
                            Counters )

   def getMapTaskReports( self, jobid ):
      assert isinstance( jobid, JobId ), f"Invalid Job ID: {jobid!r}"
      return self._sendRpc( JobTrackerClient.Rpc( "getMapTaskReports", jobid ),
                            TaskReport )

   def getReduceTaskReports( self, jobid ):
      assert isinstance( jobid, JobId ), f"Invalid Job ID: {jobid!r}"
      return self._sendRpc( JobTrackerClient.Rpc( "getReduceTaskReports",
                                jobid ), TaskReport )

   def getTaskCompletionEvents( self, jobid,
                                fromEventId=0, maxEvents=int( 2**31 - 1 ) ):
      assert isinstance( jobid, JobId ), f"Invalid Job ID: {jobid!r}"
      assert maxEvents > 0, f"Invalid argument: {maxEvents!r}"
      fromEventId = Integer( fromEventId )
      maxEvents = Integer( maxEvents )
      return self._sendRpc( JobTrackerClient.Rpc( "getTaskCompletionEvents",
                                                  jobid, fromEventId, maxEvents ),
                            TaskCompletionEvent )

   def getCleanupTaskReports( self, jobid ):
      assert isinstance( jobid, JobId ), f"Invalid Job ID: {jobid!r}"
      return self._sendRpc( JobTrackerClient.Rpc( "getCleanupTaskReports", jobid ),
                            TaskReport )

   def getSetupTaskReports( self, jobid ):
      assert isinstance( jobid, JobId ), f"Invalid Job ID: {jobid!r}"
      return self._sendRpc( JobTrackerClient.Rpc( "getSetupTaskReports", jobid ),
                            TaskReport )

   def getFileSystemName( self ):
      return self._sendRpc( JobTrackerClient.Rpc( "getFilesystemName" ), String )

   def getSystemDir( self ):
      return self._sendRpc( JobTrackerClient.Rpc( "getSystemDir" ), String )

@functools.total_ordering
class JobId( Writable ):
   HADOOP_CLASS_NAME = "org.apache.hadoop.mapred.JobID"
   __slots__ = [ 'jtid', 'jobid' ]

   def __init__( self, jtid, jobid ):
      """The JobTracker ID and Job ID."""
      super().__init__()
      self.jtid = jtid
      self.jobid = jobid

   def __eq__( self, other ):
      assert isinstance( other, JobId ), f"Invalid Job ID: {other!r}"
      return ( self.jtid, self.jobid ) == ( other.jtid, other.jobid )

   def __lt__( self, other ):
      assert isinstance( other, JobId ), f"Invalid Job ID: {other!r}"
      return ( self.jtid, self.jobid ) < ( other.jtid, other.jobid )

   def __hash__( self ):
      return hash( ( self.jtid, self.jobid ) )

   def serialize( self ):
      buf = struct.pack( ">I", self.jobid )
      buf += struct.pack( ">B", len( self.jtid ) )
      buf += self.jtid.encode()
      return buf

   @classmethod
   def readFrom( cls, sock ):
      jobid = sock.readInt()
      length = ord( sock.recvall( 1 ) )  # vint but we know size 1
      ensure( 0 < length < 127, "Invalid JTID length: %d" % length )
      jtid = sock.recvall( length ).decode()
      return JobId( jtid, jobid )


class JobACL( Writable ):
   HADOOP_CLASS_NAME = "org.apache.hadoop.mapreduce.JobACL"

   TYPES = {
         "VIEW_JOB": "mapreduce.job.acl-view-job",
         "MODIFY_JOB": "mapreduce.job.acl-modify-job",
   }

   def serialize( self ):
      raise NotImplementedError()

   @classmethod
   def readFrom( cls, sock ):
      raise NotImplementedError()


class AccessControlList( Writable ):
   HADOOP_CLASS_NAME = "org.apache.hadoop.security.authorize.AccessControlList"
   __slots__ = [ 'aclString' ] 

   def __init__( self, aclString ):
      super().__init__()
      self.aclString = aclString

   @classmethod
   def readFrom( cls, sock ):
      aclString = sock.readText()
      return AccessControlList( aclString )

   def serialize( self ):
      raise NotImplementedError()


class JobStatus( Writable ):
   HADOOP_CLASS_NAME = "org.apache.hadoop.mapred.JobStatus"

   # From JobPriority.java
   PRIORITIES = {
      "VERY_HIGH" : "veryHigh",
      "HIGH" : "high",
      "NORMAL" : "normal",
      "LOW" : "low",
      "VERY_LOW": "veryLow",
   }

   RUN_STATES = dict( enumerate( (
      "unknown",
      "running",
      "succeeded",
      "failed",
      "prep",
      "killed",
   ) ) )

   __slots__ = [ 'jobid', 'setupProgress', 'mapProgress', 'reduceProgress',
                 'cleanupProgress', 'runState', 'startTime', 'user', 'priority',
                 'schedulingInfo', 'acls', 'failureInfo' ]

   def __init__( self, jobid, setupProgress, mapProgress, reduceProgress,
                 cleanupProgress, runState, startTime, user, priority,
                 schedulingInfo, acls, failureInfo ):
      super().__init__()
      assert isinstance( jobid, JobId ), f"Invalid Job ID: {jobid!r}"
      self.jobid = jobid
      self.setupProgress = setupProgress
      self.mapProgress = mapProgress
      self.reduceProgress = reduceProgress
      self.cleanupProgress = cleanupProgress
      self.runState = runState
      self.startTime = startTime
      self.user = user
      self.priority = priority
      self.schedulingInfo = schedulingInfo
      self.acls = acls
      self.failureInfo = failureInfo

   @classmethod
   def readFrom( cls, sock ):
      jobid = JobId.readFrom( sock )
      setupProgress = sock.readFloat()
      mapProgress = sock.readFloat()
      reduceProgress = sock.readFloat()
      cleanupProgress = sock.readFloat()
      runState = sock.readInt()
      ensure( runState in JobStatus.RUN_STATES, "Invalid job state: %d" % runState )
      runState = JobStatus.RUN_STATES.get( runState )
      startTime = sock.readLong()
      user = sock.readText()
      priority = sock.readText()
      ensure( priority in JobStatus.PRIORITIES, "Invalid job priority: %r"
              % priority )
      # get the priority in camel notation
      priority = JobStatus.PRIORITIES[ priority ]
      schedulingInfo = sock.readText()

      # De-serialize the job's ACLs
      numACLs = sock.readInt()
      ensure( numACLs < 512, "Too many ACLs: %d" % numACLs )
      assert not numACLs, "ACLs parsing unsupported but got %r" % numACLs
      acls = {}
      for _ in range( numACLs ):
         aclType = sock.readText()
         ensure( aclType in JobACL.TYPES, "Invalid ACL type: %r" % aclType )
         acl = AccessControlList.readFrom( sock )
         ensure( aclType not in acls, "Already have ACLs of type %s: %s"
                 % ( aclType, acls.get( aclType ) ) )
         acls[ aclType ] = acl
      failureInfo = sock.readText()
      return JobStatus( jobid, setupProgress, mapProgress, reduceProgress,
                        cleanupProgress, runState, startTime, user, priority,
                        schedulingInfo, acls, failureInfo )

   def serialize( self ):
      raise NotImplementedError()


class JobProfile( Writable ):
   HADOOP_CLASS_NAME = "org.apache.hadoop.mapred.JobProfile"
   __slots__ = [ 'jobid', 'jobFile', 'url', 'user', 'name', 'queueName' ]

   def __init__( self, jobid, jobFile, url, user, name, queueName ):
      super().__init__()
      assert isinstance( jobid, JobId ), f"Invalid Job ID: {jobid!r}"
      self.jobid = jobid
      self.jobFile = jobFile
      self.url = url
      self.user = user
      self.name = name
      self.queueName = queueName

   @classmethod
   def readFrom( cls, sock ):
      jobid = JobId.readFrom( sock )
      jobFile = sock.readText()
      url = sock.readText()
      user = sock.readText()
      name = sock.readText()
      queueName = sock.readText()

      return JobProfile( jobid, jobFile, url, user, name, queueName )

   def serialize( self ):
      raise NotImplementedError()

class Counter:
   """Represents the various kinds of MapReduce counters in a generic fashion."""

   def __init__( self, name, value, description=None ):
      self.name = name
      self.value = value
      self.description = description or name

   def __eq__( self, other ):
      return ( other.__class__ == Counter
               and self.name == other.name
               and self.value == other.value
               and self.description == other.description )

   def __ne__( self, other ):
      return not self == other

   def __hash__( self ):
      return hash( ( self.name, self.value, self.description ) )

   def __repr__( self ):
      return ( "Counter(name=%r, value=%r, description=%r)"
               % ( self.name, self.value, self.description ) )

   def __str__( self ):
      return "%s: %d" % ( self.description, self.value )



class Counters( Writable ):
   HADOOP_CLASS_NAME = "org.apache.hadoop.mapred.Counters"

   # Maximum number of counter groups we allow.
   MAX_GROUPS = 1024

   def __init__( self, groups ):
      """Constructs a Counters instance from groups of Counter objects.

      Args:
        groups: A dict mapping a counter group name (string) to a dict of
          counters in which each entry maps a counter name (string) to a
          Counter instances.
      """
      super().__init__()
      assert isinstance( groups, dict ), f"Invalid argument {groups!r}"
      # For select counters of interest, promote them to first-class
      # attributes (assuming they are present).
      self.dataLocalMaps = 0
      self.hdfsBytesRead = 0
      self.hdfsBytesWritten = 0
      self.mapOutputBytes = 0
      self.reduceShuffleBytes = 0

      def counter( name ):
         c = group.get( name )
         return c.value if c else 0

      # HDFS I/O counters
      group = groups.get( "HDFS" )  # CDH4
      if group:
         self.hdfsBytesRead = counter( "BYTES_READ" )
         self.hdfsBytesWritten = counter( "BYTES_WRITTEN" )
      else:
         group = groups.get( "FileSystemCounters" )  # Hadoop 1.x
         if group:
            self.hdfsBytesRead = counter( "HDFS_BYTES_READ" )
            self.hdfsBytesWritten = counter( "HDFS_BYTES_WRITTEN" )

      # MapReduce task-level counters
      group = groups.get( "org.apache.hadoop.mapred.Task$Counter" )
      if group:
         self.reduceShuffleBytes = counter( "REDUCE_SHUFFLE_BYTES" )
         self.mapOutputBytes = counter( "MAP_OUTPUT_BYTES" )

      # MapReduce job-level counters
      group = groups.get( "org.apache.hadoop.mapred.JobInProgress$Counter" )
      if group:
         self.dataLocalMaps = counter( "DATA_LOCAL_MAPS" )

   class Framework( Writable ):
      HADOOP_CLASS_NAME = ( "org.apache.hadoop.mapreduce.counters."
                            "FrameworkCounterGroup" )

      JOB_COUNTERS = dict( enumerate( (  # From JobCounter.properties:
         ( "NUM_FAILED_MAPS", "Failed map tasks" ),
         ( "NUM_FAILED_REDUCES", "Failed reduce tasks" ),
         ( "TOTAL_LAUNCHED_MAPS", "Launched map tasks" ),
         ( "TOTAL_LAUNCHED_REDUCES", "Launched reduce tasks" ),
         ( "OTHER_LOCAL_MAPS", "Other local map tasks" ),
         ( "DATA_LOCAL_MAPS", "Data-local map tasks" ),
         ( "RACK_LOCAL_MAPS", "Rack-local map tasks" ),
         ( "SLOTS_MILLIS_MAPS", "Total time spent by all maps in occupied"
           " slots (ms)" ),
         ( "SLOTS_MILLIS_REDUCES", "Total time spent by all reduces in"
           " occupied slots (ms)" ),
         ( "FALLOW_SLOTS_MILLIS_MAPS", "Total time spent by all maps"
           " waiting after reserving slots (ms)" ),
         ( "FALLOW_SLOTS_MILLIS_REDUCES", "Total time spent by all reduces"
           " waiting after reserving slots (ms)" ),
      ) ) )

      TASK_COUNTERS = dict( enumerate( (  # From TaskCounter.properties:
         ( "MAP_INPUT_RECORDS", "Map input records" ),
         ( "MAP_OUTPUT_RECORDS", "Map output records" ),
         ( "MAP_SKIPPED_RECORDS", "Map skipped records" ),
         ( "MAP_OUTPUT_BYTES", "Map output bytes" ),
         ( "MAP_OUTPUT_MATERIALIZED_BYTES", "Map output materialized bytes" ),
         ( "SPLIT_RAW_BYTES", "Input split bytes" ),
         ( "COMBINE_INPUT_RECORDS", "Combine input records" ),
         ( "COMBINE_OUTPUT_RECORDS", "Combine output records" ),
         ( "REDUCE_INPUT_GROUPS", "Reduce input groups" ),
         ( "REDUCE_SHUFFLE_BYTES", "Reduce shuffle bytes" ),
         ( "REDUCE_INPUT_RECORDS", "Reduce input records" ),
         ( "REDUCE_OUTPUT_RECORDS", "Reduce output records" ),
         ( "REDUCE_SKIPPED_GROUPS", "Reduce skipped groups" ),
         ( "REDUCE_SKIPPED_RECORDS", "Reduce skipped records" ),
         ( "SPILLED_RECORDS", "Spilled Records" ),
         ( "SHUFFLED_MAPS", "Shuffled Maps " ),
         ( "FAILED_SHUFFLE", "Failed Shuffles" ),
         ( "MERGED_MAP_OUTPUTS", "Merged Map outputs" ),
         ( "GC_TIME_MILLIS", "GC time elapsed (ms)" ),
         ( "CPU_MILLISECONDS", "CPU time spent (ms)" ),
         ( "PHYSICAL_MEMORY_BYTES", "Physical memory (bytes) snapshot" ),
         ( "VIRTUAL_MEMORY_BYTES", "Virtual memory (bytes) snapshot" ),
         ( "COMMITTED_HEAP_BYTES", "Total committed heap usage (bytes)" ),
      ) ) )

      GROUPS = dict( enumerate( (
         ( TASK_COUNTERS, "org.apache.hadoop.mapred.Task$Counter" ),
         ( JOB_COUNTERS, "org.apache.hadoop.mapred.JobInProgress$Counter" ),
      ) ) )

      @classmethod
      def readFrom( cls, sock ):
         """Returns a dict of group name to dict-of-name-to-Counter."""
         groupId = sock.readVLong()
         group = cls.GROUPS.get( groupId )
         ensure( group is not None, "Unsupported counter group ID: %d" % groupId )
         group, category = group
         length = sock.readVLong()
         ensure( length < Counters.MAX_GROUPS,
                 "Too many counter groups: %d" % length )

         def readCounter():
            """Returns a pair ( counter-name, Counter-instance )."""
            enum = sock.readVLong()
            value = sock.readVLong()
            counter = group.get( enum )
            ensure( counter is not None, "Unsupported counter of group %d: %d"
                    " value %d" % ( groupId, enum, value ) )
            return counter[ 0 ], Counter( counter[ 0 ], value, counter[ 1 ] )

         return { category: dict( readCounter() for _ in range( length ) ) }

      def serialize( self ):
         raise NotImplementedError()

   class Filesystem( Writable ):
      HADOOP_CLASS_NAME = ( "org.apache.hadoop.mapreduce.counters."
                            "FileSystemCounterGroup" )

      SCHEMES = frozenset( ( b"FILE", b"HDFS" ) )
      COUNTERS = dict( enumerate( (  # From FileSystemCounter.properties:
         ( "BYTES_READ", "Number of bytes read" ),
         ( "BYTES_WRITTEN", "Number of bytes written" ),
         ( "READ_OPS", "Number of read operations" ),
         ( "LARGE_READ_OPS", "Number of large read operations" ),
         ( "WRITE_OPS", "Number of write operations" ),
      ) ) )

      @classmethod
      def readFrom( cls, sock ):
         """Returns a dict of group name to dict-of-name-to-Counter."""
         # See FileSystemCounterGroup.readFields()
         numSchemes = sock.readVLong()
         ensure( numSchemes < Counters.MAX_GROUPS,
                 "Too many schemes: %d" % numSchemes )
         counters = {}
         for _ in range( numSchemes ):
            scheme = sock.readString()
            ensure( scheme in cls.SCHEMES, "Unsupported scheme: %r" % scheme )
            numCounters = sock.readVLong()
            ensure( numCounters < MAX_ARRAY_SIZE,
                    "Too many counters: %d" % numCounters )

            def readCounter():
               """Returns a pair ( counter-name, Counter-instance )."""
               key = sock.readVLong()
               value = sock.readVLong()
               counter = cls.COUNTERS.get( key )
               ensure( counter is not None,
                       "Unsupported counter %d with value %d" % ( key, value ) )
               return counter[ 0 ], Counter( counter[ 0 ], value, counter[ 1 ] )

            ensure( scheme not in counters, "Scheme %r already seen" % scheme )
            counters[ scheme ] = dict( readCounter() for _ in range( numCounters ) )

         return counters

      def serialize( self ):
         raise NotImplementedError()

   # enum GroupType under AbstractCounters
   GROUP_MAP = {
      0x00: Framework,
      0x01: Filesystem
   }

   @classmethod
   def readFrom( cls, sock ):
      groups = {}
      if isinstance( sock.engine(), ProtoBufWithWritableEngine ):
         version = sock.readVLong()
         ensure( version == 1, "Unhandled CounterGroupFactory version %d" % version )
         numFGroups = sock.readVLong()
         ensure( numFGroups < Counters.MAX_GROUPS,
                 "Too many FS counter groups: %d" % numFGroups )
         for _ in range( numFGroups ):
            groupType = sock.readVLong()
            groupCls = Counters.GROUP_MAP.get( groupType )
            ensure( groupCls is not None,
                    "Invalid group type code: %d" % groupType )
            group = groupCls.readFrom( sock )
            groups.update( group )
         numGroups = sock.readVLong()
      else:
         numGroups = sock.readInt()

      ensure( numGroups < Counters.MAX_GROUPS,
              "Too many counter groups: %d" % numGroups )
      for _ in range( numGroups ):
         name = sock.readText()         # Written by AbstractCounters
         _displayName = sock.readText()  # Written by AbstractCounterGroup
         size = sock.readVLong()
         def readGenericCounter():
            """Returns a pair ( counter-name, Counter-instance )."""
            # Reads a org.apache.hadoop.mapreduce.counters.GenericCounter
            name = sock.readText()
            displayName = sock.readText() if sock.readBool() else name
            value = sock.readVLong()
            return name, Counter( name, value, displayName )
         ensure( name not in groups,
                 f"Already have {name!r}: {groups.get( name )!r}" )
         groups[ name ] = dict( readGenericCounter() for _ in range( size ) )
      return Counters( groups )

   def serialize( self ):
      raise NotImplementedError()


class TaskId( Writable ):
   HADOOP_CLASS_NAME = "org.apache.hadoop.mapred.TaskID"
   __slots__ = [ 'jobid', 'taskid', 'ismap' ]

   def __init__( self, jobid, taskid, ismap ):
      super().__init__()
      assert isinstance( jobid, JobId ), f"Invalid Job ID: {jobid!r}"
      self.jobid = jobid
      self.taskid = taskid
      self.ismap = ismap

   @classmethod
   def readFrom( cls, sock ):
      taskid = sock.readInt()
      jobid = JobId.readFrom( sock )
      ismap = sock.readBool()
      return TaskId( jobid, taskid, ismap )

   def serialize( self ):
      raise NotImplementedError()


class TaskAttemptId( Writable ):
   HADOOP_CLASS_NAME = "org.apache.hadoop.mapred.TaskAttemptID"
   __slots__ = [ 'taskid', 'attemptid' ]

   def __init__( self, taskid, attemptid ):
      super().__init__()
      assert isinstance( taskid, TaskId ), f"Invalid Task ID: {taskid!r}"
      self.taskid = taskid
      self.attemptid = attemptid

   @classmethod
   def readFrom( cls, sock ):
      attemptid = sock.readInt()
      taskid = TaskId.readFrom( sock )
      return TaskAttemptId( taskid, attemptid )

   def serialize( self ):
      raise NotImplementedError()


class TaskReport( Writable ):
   HADOOP_CLASS_NAME = "org.apache.hadoop.mapred.TaskReport"
   __slots__ = [ 'taskid', 'progress', 'state', 'start', 'end', 'diagnostics',
                 'counters', 'status', 'attempts' ]

   STATUSES = {
      "PENDING" : "pending",
      "RUNNING" : "running",
      "COMPLETE" : "complete",
      "KILLED" : "killed",
      "FAILED" : "failed",
   }

   def __init__( self, taskid, progress, state, start, end, diagnostics,
                 counters, status, attempts ):
      super().__init__()
      assert isinstance( taskid, TaskId ), f"Invalid Task ID: {taskid!r}"
      self.taskid = taskid
      self.progress = progress
      self.state = state
      self.start = start
      self.end = end
      self.diagnostics = diagnostics
      self.counters = counters
      self.status = status
      self.attempts = attempts

   def serialize( self ):
      raise NotImplementedError()

   @classmethod
   def readFrom( cls, sock ):
      taskid = TaskId.readFrom( sock )
      progress = sock.readFloat()
      state = sock.readText()
      start = sock.readLong()
      end = sock.readLong()
      num = sock.readInt()
      if num == -1:
         diagnostics = None
      else:
         ensure( num < MAX_ARRAY_SIZE, "Array too large: %d" % num )
         diagnostics = [ sock.readString() for _ in range( num ) ]
      counters = Counters.readFrom( sock )
      status = sock.readText()
      ensure( status in TaskReport.STATUSES, "Invalid task status: %r" % status )
      if status == "RUNNING":
         num = sock.readVLong()
         ensure( num < MAX_ARRAY_SIZE, "Too many task attempts: %d" % num )
         attempts = tuple( TaskAttemptId.readFrom( sock ) for _ in range( num ) )
      elif status == "COMPLETE":
         attempts = ( TaskAttemptId.readFrom( sock ), )
      else:
         attempts = ()

      return TaskReport( taskid, progress, state, start, end, diagnostics,
                         counters, TaskReport.STATUSES[ status ], attempts )

   def key( self ):
      return self.taskid

class ClusterStatus( Writable ):
   HADOOP_CLASS_NAME = "org.apache.hadoop.mapred.ClusterStatus"
   __slots__ = [ 'activeTrackers', 'trackers', 'blacklistedTrackers',
                 'blacklisted', 'excludedNodes', 'trackerExpiryInterval',
                 'mapTasksRunning', 'reduceTasksRunning', 'mapSlots', 'reduceSlots',
                 'jtHeapSize', 'jtMaxHeapSize', 'status' ]

   STATUSES = {
         "INITIALIZING" : "initializing",
         "RUNNING" : "running",
   }
   MIN_STATUS_LEN = min( len( status ) for status in STATUSES )

   def __init__( self, activeTrackers, trackers, blacklistedTrackers,
                 blacklisted, excludedNodes, trackerExpiryInterval,
                 mapTasksRunning, reduceTasksRunning, mapSlots, reduceSlots,
                 jtHeapSize, jtMaxHeapSize, status ):
      super().__init__()
      self.activeTrackers = activeTrackers
      self.trackers = trackers
      self.blacklistedTrackers = blacklistedTrackers
      self.blacklisted = blacklisted
      self.excludedNodes = excludedNodes
      self.trackerExpiryInterval = trackerExpiryInterval
      self.mapTasksRunning = mapTasksRunning
      self.reduceTasksRunning = reduceTasksRunning
      self.mapSlots = mapSlots
      self.reduceSlots = reduceSlots
      self.jtHeapSize = jtHeapSize
      self.jtMaxHeapSize = jtMaxHeapSize
      self.status = status

   @classmethod
   def readFrom( cls, sock ):
      activeTrackers = sock.readInt()
      num = sock.readInt()
      ensure( num < MAX_ARRAY_SIZE, "Array too large: %d" % num )
      trackers = [ sock.readText() for _ in range( num ) ]
      # It's OK to have num == 0 if detailed=False on the RPC we sent.
      ensure( num == activeTrackers or not num,
              "Expected %d but got %d: %r" % ( activeTrackers, num, trackers ) )
      blacklistedTrackers = sock.readInt()
      num = sock.readInt()
      ensure( num < MAX_ARRAY_SIZE, "Array too large: %d" % num )
      blacklisted = [ sock.readText() for _ in range( num ) ]
      ensure( num == blacklistedTrackers or not num,
              "Expected %d but got %d: %r" % ( blacklistedTrackers, num,
                                               blacklisted ) )
      # Various flavors of Hadoop, all claiming to support protocol version
      # 28, differ in whether or not they have MAPREDUCE-1966 patched in.
      # This adds a couple fields: number of graylisted nodes and optionally a
      # list of those nodes.  There is no way for us to know ahead of time
      # whether this will be present on the wire, so we have to sniff ahead to
      # determine whether this list is present.  We know that given the
      # remaining fields, we have at least 44 bytes left to read, followed by
      # the status string, which is 1 byte vint + a string of that size, which
      # we know has to be in ClusterStatus.STATUSES.  So we can skip over
      # those bytes and then read the number of bytes of the smallest status
      # and then see if we have a prefix match in our list of valid statuses.
      # Yes, doing this makes me really sad too.
      buf = sock.recvall( 45 )
      stat = sock.recvall( ClusterStatus.MIN_STATUS_LEN )
      hasGraylist = True
      for status in ClusterStatus.STATUSES:
         if status.encode().startswith( stat ):
            hasGraylist = False
            break
      sock.unread( buf + stat )
      if hasGraylist:  # MAPREDUCE-1966 added gray listing.
         graylistedTrackers = sock.readInt()
         num = sock.readInt()
         ensure( num < MAX_ARRAY_SIZE, "Array too large: %d" % num )
         graylisted = [ sock.readText() for _ in range( num ) ]
         ensure( num == graylistedTrackers or not num, "Expected %d but"
                 " got %d: %r" % ( graylistedTrackers, num, graylisted ) )
      excludedNodes = sock.readInt()
      trackerExpiryInterval = sock.readLong()
      mapTasksRunning = sock.readInt()
      reduceTasksRunning = sock.readInt()
      mapSlots = sock.readInt()
      reduceSlots = sock.readInt()
      jtHeapSize = sock.readLong()
      jtMaxHeapSize = sock.readLong()
      status = sock.readText()
      ensure( status in ClusterStatus.STATUSES,
              "Invalid cluster status: %r" % status )
      return ClusterStatus( activeTrackers, trackers,
                            blacklistedTrackers, blacklisted,
                            excludedNodes, trackerExpiryInterval,
                            mapTasksRunning, reduceTasksRunning, mapSlots,
                            reduceSlots, jtHeapSize, jtMaxHeapSize,
                            ClusterStatus.STATUSES[ status ] )

   def serialize( self ):
      raise NotImplementedError()


class JobQueueInfo( Writable ):
   HADOOP_CLASS_NAME = "org.apache.hadoop.mapred.JobQueueInfo"
   __slots__ = [ 'name', 'state', 'scheduling' ]

   def __init__( self, name, state, scheduling ):
      super().__init__()
      self.name = name
      self.state = state
      self.scheduling = scheduling

   @classmethod
   def readFrom( cls, sock ):
      name = sock.readText()
      state = sock.readText()
      scheduling = sock.readText()
      return JobQueueInfo( name, state, scheduling )

   def serialize( self ):
      raise NotImplementedError()


class TaskCompletionEvent( Writable ):
   HADOOP_CLASS_NAME = "org.apache.hadoop.mapred.TaskCompletionEvent"
   __slots__ = [ 'taskid', 'idWithinJob', 'isMap', 'status', 'taskTrackerHttp',
                 'taskRunTime', 'eventId' ]

   STATUSES = frozenset( (
      "FAILED",
      "KILLED",
      "SUCCEEDED",
      "OBSOLETE",
      "TIPFAILED",
   ) )

   def __init__( self, taskid, idWithinJob, isMap, status, taskTrackerHttp,
                 taskRunTime, eventId ):
      super().__init__()
      self.taskid = taskid
      self.idWithinJob = idWithinJob
      self.isMap = isMap
      self.status = status
      self.taskTrackerHttp = taskTrackerHttp
      self.taskRunTime = taskRunTime
      self.eventId = eventId

   @classmethod
   def readFrom( cls, sock ):
      taskid = TaskAttemptId.readFrom( sock )
      idWithinJob = sock.readVLong()
      isMap = sock.readBool()
      status = sock.readText()
      ensure( status in TaskCompletionEvent.STATUSES, "Invalid status: %r" % status )
      taskTrackerHttp = sock.readString().decode()
      taskRunTime = sock.readVLong()
      eventId = sock.readVLong()
      return TaskCompletionEvent( taskid, idWithinJob, isMap, status,
                                  taskTrackerHttp, taskRunTime, eventId )

   def serialize( self ):
      raise NotImplementedError()

