#!/bin/bash
set -eo pipefail

function follow_link {
  python -c 'import os,sys; print(os.path.realpath(sys.argv[1]))' $1
}


OUTPUT="medaka"
THREADS=1

medaka_version=$(medaka --version)
modeldata=()
while read -r line; do
    modeldata+=("$line")
done < <(medaka tools list_models) # Available models on first line, default consensus model on next line
MODEL=${modeldata[1]##* }
BATCH_SIZE=100
FORCE=false
VCFOUT=false

iflag=false
dflag=false

usage="
${medaka_version}
------------

Assembly polishing via neural networks. The input assembly should be
preprocessed with racon.

$(basename "$0") [-h] -i <fastx>

    -h  show this help text.
    -i  fastx input basecalls (required).
    -d  fasta input assembly (required).
    -o  output folder (default: medaka).
    -v  output vcf of changes, and bed file of polished regions.
        (note that this slows down consensus calculation).
    -m  medaka model, (default: ${MODEL}).
        ${modeldata[0]}.
        Alternatively a .hdf file from 'medaka train'.
    -f  Force overwrite of outputs (default will reuse existing outputs).
    -t  number of threads with which to create features (default: 1).
    -b  batchsize, controls memory use (default: ${BATCH_SIZE})."


while getopts ':hi::d:o:vm:ft:b:' option; do
  case "$option" in
    h  ) echo "$usage" >&2; exit;;
    i  ) iflag=true; BASECALLS=$(follow_link $OPTARG);;
    d  ) dflag=true; DRAFT=$(follow_link $OPTARG);;
    o  ) OUTPUT=$OPTARG;;
    v  ) VCFOUT=true;;
    m  ) MODEL=$(medaka tools resolve_model --model $OPTARG);;
    f  ) FORCE=true;;
    t  ) THREADS=$OPTARG;;
    b  ) BATCH_SIZE=$OPTARG;;
    \? ) echo "Invalid option: -${OPTARG}." >&2; exit 1;;
    :  ) echo "Option -$OPTARG requires an argument." >&2; exit 1;;
  esac
done
shift $(($OPTIND - 1))

if ! $iflag; then
  echo "$usage" >&2;
  echo "" >&2;
  echo "-i must be specified." >&2;
  exit 1;
fi

if ! $dflag; then
  echo "$usage" >&2;
  echo "" >&2;
  echo "-d must be specified." >&2;
  exit 1;
fi

echo "Checking program versions"
echo "This is ${medaka_version}"
medaka_version_report || exit 1

if [[ ! -e ${OUTPUT} ]]; then
  mkdir -p ${OUTPUT}
elif ${FORCE}; then
  echo "Warning: Output will be overwritten (-f flag)"
else
  echo "Warning: Output ${OUTPUT} already exists, may use old results."
fi

cd ${OUTPUT}

rflag=$(medaka tools is_rle_model --model $MODEL)
[[ $rflag == "True" ]] && rflag=true || rflag=false

if $rflag; then
  COMPRESSED_BASECALLS=basecalls.fastrle.gz
  COMPRESSED_DRAFT=draft.fastrle.gz
  if [[ ! -e ${COMPRESSED_BASECALLS} ]] ||  [[ ! -e ${COMPRESSED_DRAFT} ]] || ${FORCE}; then
    echo "Compressing draft and basecalls."

    medaka fastrle ${BASECALLS} | bgzip > ${COMPRESSED_BASECALLS}
    medaka fastrle ${DRAFT} | bgzip > ${COMPRESSED_DRAFT}

    BASECALLS=${COMPRESSED_BASECALLS}
    DRAFT=${COMPRESSED_DRAFT}
    FORCE=true
  else
    echo "Not compressing basecalls and draft, ${COMPRESSED_BASECALLS} and ${COMPRESSED_DRAFT} exist."
  fi
fi

CALLS2DRAFT=calls_to_draft
ALIGN_PARAMS=$(medaka tools get_alignment_params --model $MODEL)
if [[ ! -e ${CALLS2DRAFT}.bam ]] || ${FORCE}; then
    echo "Aligning basecalls to draft"
    mini_align -i ${BASECALLS} -r ${DRAFT} -p ${CALLS2DRAFT} -t ${THREADS} -m -f $ALIGN_PARAMS \
      || (echo "Failed to run alignment of reads to draft." && exit 1)
    FORCE=true
else
    echo "Not aligning basecalls to draft, ${CALLS2DRAFT}.bam exists."
fi

CONSENSUSPROBS=consensus_probs.hdf
if [[ ! -e ${CONSENSUSPROBS} ]] || ${FORCE}; then
    echo "Running medaka consensus"
    rm -rf ${CONSENSUSPROBS}
    medaka consensus ${CALLS2DRAFT}.bam ${CONSENSUSPROBS} \
        --model ${MODEL} --batch_size ${BATCH_SIZE} --threads ${THREADS} \
        || (echo "Failed to run medaka consensus." && exit 1)
    FORCE=true
else
    echo "Not running medaka consensus, ${CONSENSUSPROBS} exists."
fi

VCF_OUTPUT=variants.vcf
CONSENSUS=consensus.fasta
CHAIN=draft_to_consensus.chain
WASPOLISHEDBED=polished_regions_in_draft_coords.bed
if [[ ! -e ${CONSENSUS} ]] || ${FORCE}; then
    if ! ${VCFOUT} || $rflag; then
        echo "Using medaka stitch to create consensus."
        if $rflag; then 
            # filling gaps with draft sequence won't currently work with rle
            STITCH_OPTS="" 
        else
            STITCH_OPTS="--draft ${DRAFT}"
            GAP_BED="${CONSENSUS}.gaps_in_draft_coords.bed"
        fi

        MAXJOBS=8
        JOBS=$(( MAXJOBS < THREADS ? MAXJOBS : THREADS ))
        medaka stitch --threads ${JOBS} ${CONSENSUSPROBS} ${CONSENSUS} ${STITCH_OPTS} \
            || (echo "Failed to stitch consensus chunks." && exit 1)

        if ! $rflag; then
            echo "BED delineating regions which were not polished (in draft coordinates): ${OUTPUT}/${GAP_BED}"
        fi

    else
        echo "Using medaka variant to create vcf and apply to draft."
        rm -rf ${VCF_OUTPUT} ${VCF_OUTPUT}.gz ${CHAIN} ${WASPOLISHEDBED}
        medaka variant ${DRAFT} ${CONSENSUSPROBS} ${VCF_OUTPUT} \
             || (echo "Failed to call variants." && exit 1)
        echo "Applying variants"
        bgzip ${VCF_OUTPUT}
        tabix -p vcf ${VCF_OUTPUT}.gz
        bcftools consensus -f ${DRAFT} --sample SAMPLE -c ${CHAIN} ${VCF_OUTPUT}.gz > ${CONSENSUS}
        echo "VCF with changes made by medaka: ${OUTPUT}/${VCF_OUTPUT}.gz"
        medaka tools hdf_to_bed ${CONSENSUSPROBS} ${WASPOLISHEDBED}
        echo "BED delineating regions which were polished (in draft coordinates): ${OUTPUT}/${WASPOLISHEDBED}"
    fi
    echo "Polished assembly written to ${OUTPUT}/${CONSENSUS}, have a nice day."
    FORCE=true
else
    echo "Consensus ${OUTPUT}/${CONSENSUS} exists, remove ${OUTPUT} and try again."
fi
