#!/bin/bash
set -e
exec 9<"$0"
flock -x 9

LOG_DIR="/run/initblockdev"
mkdir -p "$LOG_DIR"

log() {
    local msg="$1"

    logger -p local4.info -t initblockdev "$msg"
}

log_file() {
    local file="$1"

    while read -r line; do
        log "$line"
    done < "$file"
}

ord() {
    local char="$1"
    LC_CTYPE=C printf '%x' "'${char}" 2>/dev/null
}

get_fstype() {
    local device="$1"

    if [ "$(dd if="${device}" bs=1 skip=82 count=5 2>/dev/null)" = "FAT32" ] \
         || [ "$(dd if="${device}" bs=1 skip=54 count=5 2>/dev/null)" = "FAT16" ]; then
        echo "vfat"
        return
    fi

    # Look for ext4's magic number (0xef53). The superblock is located at
    # 0x400 and the checksum is at the offset 0x38 (little endian)
    local ext4_raw1="$(dd if="${device}" skip=$((0x439)) bs=1 count=1 2>/dev/null)"
    local ext4_raw2="$(dd if="${device}" skip=$((0x438)) bs=1 count=1 2>/dev/null)"
    local ext4_mag1="$(ord "${ext4_raw1}")"
    local ext4_mag2="$(ord "${ext4_raw2}")"

    if [ "0x${ext4_mag1}" == "0xef" ] && [ "0x${ext4_mag2}" == "0x53" ]; then
        echo "ext4"
    fi

    local iso_magic1=$(dd if=${device} skip=$((0x8001)) bs=1 count=5 2>/dev/null)
    local iso_magic2=$(dd if=${device} skip=$((0x8801)) bs=1 count=5 2>/dev/null)
    local iso_magic3=$(dd if=${device} skip=$((0x9001)) bs=1 count=5 2>/dev/null)

    if [ "${iso_magic1}" == "CD001" ] || [ "${iso_magic2}" == "CD001" ] || \
       [ "${iso_magic3}" == "CD001" ]; then
        echo "iso9660"
        return
    fi
}

run_e2fsck() {
    local dev="$1"
    local part="$2"
    local mount="$3"
    local logfile="$LOG_DIR/e2fsck-${dev}${part}.log"
    local device="/dev/${dev}${part}"

    log "Running e2fsck on: ${mount}"

    local res=0
    echo "E2FSCK(${device}) attempt #1" >"${logfile}"
    /sbin/e2fsck -p "${device}" >>"${logfile}" 2>&1 || res="$?"

    # 0: No errors
    # 1: File system errors corrected
    # 8: Operational error (already mounted)
    if [ "${res}" -eq 0 ] || [ "${res}" -eq 1 ] || [ "${res}" -eq 8 ]; then
        rm -f "${logfile}"
        return
    fi

    log "WARNING: e2fsck -p ${device} return code: ${res}"
    echo "e2fsck -p ${device} return code: ${res}" >> "${logfile}"

    if [ "$((${res} & 4))" -eq 4 ]; then
        res=0
        echo "E2FSCK(${device}) attempt #2" >>"${logfile}"
        /sbin/e2fsck -y "${device}" >>"${logfile}" 2>&1 || res="$?"

        if [ "$((${res} & 4))" -eq 4 ]; then
            echo "e2fsck -y ${device} return code: ${res}" >> "${logfile}"
            log_file "${logfile}"
            log "ERROR: e2fsck couldn't fix ${device}"
        fi
    fi

    rm -f "${logfile}"

    if [ "$((${res} & 2))" -eq 2 ]; then
        log "Rebooting as instructed by e2fsck"
        reboot
    fi
}

mount_vfat() {
    local dev="$1"
    local part="$2"
    local mount="$3"
    local logfile_fsck="$LOG_DIR/dosfsck-${dev}${part}.log"
    local logfile_mount="$LOG_DIR/mount-${dev}${part}.log"
    local device="/dev/${dev}${part}"
    local mount_options='flush,dirsync,noatime,uid=0,gid=eosadmin,umask=0007'

    log "Running dosfsck on: ${mount}"

    local ret=0
    echo "DOSFSCK(${device}) attempt #1" >> "${logfile_fsck}"
    /sbin/dosfsck -a -w "${device}" >>"${logfile_fsck}" 2>&1 || ret="$?"

    # We don't have a good way to determine if the filesystem is
    # clean after dosfsck returns a value differnt from 0. Re-run it to find
    # out
    if [ "${ret}" -ne 0 ]; then
        log "dosfsck returned ${ret} on ${device} ${mount}, verifying the partition again"
        echo "DOSFSCK(${device}) attempt #2" >> "${logfile_fsck}"
        ret=0
        /sbin/dosfsck -a -w "${device}" >>"${logfile_fsck}" 2>&1 || ret="$?"

        if [ "${ret}" -ne 0 ] && [ "${ret}" -ne 1 ]; then
            log_file "${logfile_fsck}"
            log "dosfsck verification on ${device} ${mount} returned ${ret}, not mounting the partition"
            exit 1
        fi
    fi

    if [ ${ret} -eq 1 ]; then
        log_file "${logfile_fsck}"
        log "dosfsck corrected errors on ${device} ${mount}"
    fi

    if [ ${ret} -eq 0 ]; then
        log "dosfsck completed with no warnings or errors on ${device} ${mount}"
    fi

    ret=0
    mount -t vfat -o "${mount_options}" "${device}" "${mount}" >&"${logfile_mount}" || ret="$?"

    if [ "${ret}" -ne 0 ]; then
        log_file "${logfile_mount}"
        log "Fail to mount ${device} on ${mount}: $ret"
    fi

    rm -f "${logfile_fsck}" "${logfile_mount}"
}

mount_ext4() {
    local dev="$1"
    local part="$2"
    local mount="$3"
    local mountssd="$4"
    local logfile_mount="$LOG_DIR/mount-${dev}${part}.log"
    local device="/dev/${dev}${part}"
    local mount_options='noatime'
    local res=0

    case "${mount}" in
        "/mnt/drive"*)
            "${mountssd}" || exit 0
        ;;&
        "/mnt/flash" | "/mnt/crash")
            mount_options="${mount_options},grpid,commit=1"
        ;&
        "/mnt/drive"*)
            run_e2fsck "${dev}" "${part}" "${mount}"
            mount -o "${mount_options}" "${device}" "${mount}" >&"${logfile_mount}" || res="$?"
            if [ "$res" -eq 0 ]; then
                if [ "${mount}" = "/mnt/crash" ]; then
                    mount -o remount,ro /mnt/crash
                else
                    chmod 770 "${mount}"
                    chgrp eosadmin "${mount}"
                fi
            else
                log_file "${logfile_mount}"
                log "Failed to mount ${dev}${part} to ${mount}: $res"
            fi
        ;;
        *)
            mount -o "${mount_options}" "${device}" "${mount}" >&"${logfile_mount}" || res="$?"
            if [ "$res" -ne 0 ]; then
                log_file "${logfile_mount}"
                log "Failed to mount ${dev}${part} to ${mount}"
            fi
        ;;
    esac
}

get_devid() {
    local dev="$1"
    local devid=""

    [ -z "$dev" ] && return

    if [ -e /sys/block/${dev}/device ]; then
        devid=$(realpath /sys/block/${dev}/device | sed "s#/sys/devices/\(.*\)#\1#")
    fi

    echo "$devid"
}

# Check if a drive's devid matches the current boot drive
multidrive_is_curr_drive() {
   local devid="$1"
   read -r drive_re < /etc/multidrive_src
   expr "$devid" : "$drive_re" >/dev/null
}

# Check if a drive's devid matches ANY boot drive on the system, not necessarily
# the current one being attempted.
multidrive_is_boot_drive() {
   # If /etc/multidrive doesn't exist, then no boot_drive[0-9]* kernel params were
   # passed to EOS. Treat this as a "non boot drive" so that it is still mounted.
   [ -f /etc/multidrive ] || return 1

   local devid="$1"
   while read -r drive_re; do
      expr "$devid" : "$drive_re" >/dev/null && return 0
   done < /etc/multidrive
   return 1
}

# Check if the passed /dev/ name is a boot drive for which boot is NOT currently
# being attempted.
multidrive_is_inactive_drive() {
   local dev="$1"
   local devid
   devid="$(get_devid "$dev")"
   [ -n "$devid" ] &&                                                               \
      multidrive_is_boot_drive "$devid" && ! multidrive_is_curr_drive "$devid"
}

# Partitions in sxverity format should never be auto-mounted
# We check for a magic string of "sxverity" in the first 8 bytes of the partition
# to indicate an sxverity partition
check_verity_partition() {
   local device_path="$1"

   if [ "$( dd if=${device_path} bs=1 count=8 2>/dev/null )" = "sxverity" ]; then
      echo "no_mount"
   fi
}

dm_mount_name() {
    local device="$1"
    local info=""
    local mount

    if ! [ -f "/sys/block/$device/dm/name" ]; then
        return
    fi

    mount=$(cat "/sys/block/$device/dm/name" | sed -ne "s/vg[0-9]\+-\(.*\)/\\1/p")
    # Only mount lvms for flash, drive and crash partitions
    if [ "$mount" == "flash" ] || [ "$mount" == "crash" ] || [ "$mount" == "drive" ]; then
        echo "$mount"
    else
        echo "no_mount"
    fi
}

label_mount_name() {
    local device_path="$1"
    local label="$2"
    local mount

    # If we're passed a label (this comes from udev), just use that.  If we're not,
    # try and use blkid to get the label from the device path
    if [ -z "${label}" ]; then
       label=$(/sbin/blkid | sed -nr "s#^${device_path}:.*[[:space:]]+LABEL=\"([^ ]+)\".*#\1#p")
    fi

    # First, check if the label indicates a redundant partition from the
    # redundant boot feature.  Redundant partitions are not auto-mounted
    if is_redundant_partition "${label}"; then
       echo "no_mount"
       return
    fi

    # Also check if the label indicates a partition storing Aboot image.
    # The aboot partitions are not auto-mounted too.
    if is_aboot_partition "${label}"; then
       echo "no_mount"
       return
    fi

    # Skip mounting extended partitions, they are only used to have more
    # than 4 partitions on MBR.
    if is_extended_partition "${device_path}"; then
       echo "no_mount"
       return
    fi

    if [ -z "$label" ]; then
        echo ""
        return
    fi

    # Labels must start with 'eos' and be followed by '-' or '_'. Everything after
    # this prefix is going to be used as mount point. For example a partition with
    # the label 'EOS-TEST' would be mounted to /mnt/test.
    # This label based detection is case insensitive.
    mount=$(expr "${label}" : '[Ee][Oo][Ss][-_]\(.*\)' || :)

    # Convert mount point to lower case. We always use lower case mount
    # points
    mount=$(echo "${mount}" | tr '[A-Z]' '[a-z]')
    if [ -z "$mount" ]; then
        echo "non_eos_label"
        return
    fi

    echo "$mount"
}

blockdev_mount_name() {
    local dev="$1"
    local part="$2"
    local devid=$( get_devid "$dev" )
    local mount

    while read -r i n; do
        expr match "$i" "#" >/dev/null && continue || :
        [ -n "${devid}" ] && expr match "${devid}/${part}" "$i" >/dev/null && mount=$n && break || :
        expr match "block/${dev}/${dev}${part}" "$i" >/dev/null && mount=$n && break || :
        expr match "@${dev}" "$i" >/dev/null && mount=$n && break || :
    done </etc/blockdev

    echo "$mount"
}

identify_mount_name() {
    local dev="$1"
    local part="$2"
    local label="$3"
    local device_path="/dev/${dev}${part}"
    local mount=""

    [ -z "$dev" ] && return

    if multidrive_is_inactive_drive "$dev"; then
       echo "no_mount"
       return
    fi

    mount=$(check_verity_partition "${device_path}")
    if [ -n "$mount" ]; then
       echo "$mount"
       return
    fi

    mount=$(dm_mount_name "${dev}${part}")
    if [ -n "$mount" ]; then
        echo "$mount"
        return
    fi

    mount_name=$(label_mount_name "$device_path" "$label")
    if [ -n "$mount_name" ] && [ "$mount_name" != "non_eos_label" ]; then
        echo "$mount_name"
        return
    fi

    mount=$(blockdev_mount_name "$dev" "$part")
    if [ "$mount_name" == "non_eos_label" ]; then
        if [ "$mount" == "flash" ] || [ "$mount" == "drive" ] || \
           [ "$mount" == "drive2" ]; then
            # For internal partitions with a label, the partition
            # should not be automatically mounted.
            echo "no_mount"
            return
        fi
    fi

    echo "$mount"
}

# Redundant partitions are any partitions with a disklabel that has a prefix of
# rdnt_  These partitions are not auto-mounted.
is_redundant_partition() {
   local label="$1"

   echo ${label} | grep -i -q "[Rr][Dd][Nn][Tt][-_]"
}

# Aboot partitions are any partitions with a disklabel that contains the word
# aboot. These partitions are not auto-mounted.
is_aboot_partition() {
   local label="$1"

   echo ${label} | grep -i -q "[Aa][Bb][Oo][Oo][Tt]"
}

# Extended partitions might be present on MBR-based devices.
# These partitions are not auto-mounted.
is_extended_partition() {
   local device_path="$1"
   local dev=$(echo "${device_path}" | sed 's/p\?[0-9]\+$//')

   [ -e "${dev}" ] && \
   /sbin/fdisk -l "${dev}" | grep -i "${device_path}" | grep -i -q -E "Extended[ \t]*$"
}

disk_is_partitioned() {
   /sbin/fdisk -l "/dev/$1" | grep -qs "/dev/$1p\?[0-9]\+"
}

# initblockdev will not mount the sdd when called from udev or sysinit since
# we want to fsck the disk before mounting which can take a significant time
# to complete and it is not acceptable to delay boot time for this long.
# See BUG 195290.
# fsck and mounting ssd is done by ssdinit service.
usage()
{
    echo "Usage: $0 <devname> <action> [ --mountssd ] [ <disklabel> ]"
    exit 1
}

case "$#" in
    2 | 3 | 4)
        devname="$1"
        action="$2"
    ;;&
    2)
        mountssd=false
    ;;
    3)
        if [ "$3" = "--mountssd" ]; then
           mountssd=true
        else
           mountssd=false
           disklabel="$3"
        fi
    ;;
    4)
        if [ "$3" = "--mountssd" ]; then
           mountssd=true
        fi
        disklabel="$4"
    ;;
    *)
        usage
    ;;
esac

case "${action}" in
    add)
        grep -qs "AristaBlockdevAutomount=0" /proc/cmdline && exit 0

        dev= part=
        # /dev/xvd[a-z] or /dev/nvme[0-9]n[0-9] below refers to block devices on AWS platform
        eval $(echo "$devname" | sed -n 's#\([hs]d[a-z]\+\|xvd[a-z]\+\|nvme[0-9]n[0-9]\+\|mmcblk[0-9]\+\|dm-[0-9]\+\)\(p\?[0-9]*\)$#dev="\1" part="\2"#p')
        [ -z "${dev}" ] && exit 0

        # If we are invoked with a whole disk, check if we have partitions first and
        # only attempt to mount it if no partitions are found
        [ -z "$part" ] && disk_is_partitioned "$dev" && exit 0

        # Don't attempt to mount LVM PVs
        lvm pvdisplay /dev/${dev}${part} >/dev/null 2>&1 && exit 0

        mount_name=$( identify_mount_name "$dev" "$part" "$disklabel" )
        [ "$mount_name" == "no_mount" ] && exit 0
        mount=/mnt/${mount_name}

        # No mount point selected
        [ -z "$mount_name" ] && exit 0

        fstype="$(get_fstype "/dev/${dev}${part}")"
        log "${dev}${part}, mount=${mount}, fstype=${fstype}"

        # On KVM, Aboot iso sometimes gets mounted on /mnt/flash.
        [ "$mount_name" == "flash" ] && [ "$fstype" == "iso9660" ] && exit 0

        if [ "$(dd if="/dev/${dev}${part}" bs=1 count=6 2>/dev/null)" = "070701" ]; then
            mount="${mount}-recover"
        else
            # We are trying to remount a device block to the same mount point
            if grep -qs "/dev/${dev}${part} ${mount}" /proc/mounts; then
                exit 0
            fi

            # Check if a device is already mounted to our target mount point
            mountedDev=$(sed -n -r "s#^/dev/([^ ]+) ${mount} .*\$#\1#p" /proc/mounts)
            if [ -n "${mountedDev}" ]; then
                # If we don't have an fstype, most likely we couldn't have mounted
                # this device, but don't warn because it can generate too many msgs
                if [ -n "$fstype" ] && [ "${mountedDev}" != "${dev}${part}" ]; then
                   log "WARNING: cannot mount /dev/${dev}${part} on ${mount} because /dev/${mountedDev} is already mounted there"
                fi
                exit 0
            fi

            mkdir -p "${mount}"

            if "${mountssd}" && [[ "${mount}" != /mnt/drive* ]]; then
                echo "--mountssd is specified, but device is not drive (would be ${mount})"
                log "Skipping fsck checks because ${mount} is not /mnt/drive*"
                exit 0
            fi

            [ "$(stat -c %D "${mount}")" = "$(stat -c %D /mnt)" ]
            if [ "x${fstype}" == xvfat ]; then
                log "mounting as vfat"
                mount_vfat "${dev}" "${part}" "${mount}"
            elif [ "x$fstype" == xext4 ]; then
                # Disable log message for BUG713295
                # log "mounting as ext4"
                mount_ext4 "${dev}" "${part}" "${mount}" "${mountssd}"
            else
                # Skip log msg, generates too many messages, initdevblock too slow
                # log "mounting as other than vfat or ext4: '${fstype}'"
                mount -o noatime "/dev/${dev}${part}" "${mount}" 2>/dev/null
            fi
        fi

        devid=$(get_devid "$dev")
        echo -e "devid='${devid}'\ndev='${dev}'\npart='${part}'\nfstype='${fstype}'" >"${mount}.conf"
        ;;
    remove)
        mtab="$(mktemp /tmp/tmp.XXXXXX)"
        cp /proc/mounts "${mtab}"
        while read dev mount x; do
            if [ "/dev/$1" = "${dev}" ]; then
                rm -f "${mount}.conf"
                umount -l "${mount}" 2>/dev/null
                rmdir "${mount}" 2>/dev/null || :
            fi
        done <"${mtab}"
        rm -f "${mtab}"
        ;;
esac
