#! /bin/bash

helpFunction()
{
    echo ""
    echo "Usage: $0 -m model-parallelism -g gpu-per-node -n node# -b batch-size -s steps -l layers -h hidden_size -q seq_length -e heads -c ckpt_num_layers -p [-d]"
    echo -e "\t-m model parallelism"
    echo -e "\t-g gpus per node"
    echo -e "\t-n node count"
    echo -e "\t-b batch size"
    echo -e "\t-s training steps"
    echo -e "\t-l layers"
    echo -e "\t-h hidden size"
    echo -e "\t-q sequence length"
    echo -e "\t-e attention heads"
    echo -e "\t-c checkpoint num_layers"
    echo -e "\t-o other args"
    echo -e "\t-d DeepSpeed config json file"
    echo -e "\t-z Enable Zero optimization"
    echo -e "\t-p DeepSpeed master port"
    exit 1
}

layers=2
hidden_size=128
seq_length=1024
ckpt_num_layers=1
other_args=""
ds_opt=""
zero_opt=""
master_port=29600

script_path=$(realpath $0)
script_dir=$(dirname $script_path)

while getopts "m:g:n:b:s:l:h:q:e:c:o:d:z" opt
do
    case "$opt" in
        m ) mp="$OPTARG" ;;
        g ) gpus="$OPTARG" ;;
        n ) nodes="$OPTARG" ;;
        b ) bs="$OPTARG" ;;
        s ) steps="$OPTARG" ;;
        l ) layers="$OPTARG" ;;
        h ) hidden_size="$OPTARG" ;;
        q ) seq_length="$OPTARG" ;;
        e ) heads="$OPTARG" ;;
        c ) ckpt_num_layers="$OPTARG" ;;
        p ) master_port="$OPTARG" ;;
        o ) other_args="$OPTARG" ;;
        d ) ds_opt="--deepspeed --deepspeed_config $script_dir/$OPTARG" ;;
        z ) zero_opt="--zero_optimization" ;;
        ? ) helpFunction ;;
    esac
done

# Print helpFunction in case parameters are empty
if [ -z "$mp" ] || [ -z "$gpus" ] || [ -z "$nodes" ] || [ -z "$bs" ] || [ -z "$steps" ]
then
    echo "Some or all of the parameters are empty";
    helpFunction
fi

# Change for multinode config
MASTER_ADDR=localhost
MASTER_PORT=6000

gpt_options=" \
       --model-parallel-size ${mp} \
       --num-layers ${layers} \
       --hidden-size ${hidden_size} \
       --num-attention-heads ${heads} \
       --batch-size ${bs} \
       --seq-length ${seq_length} \
       --max-position-embeddings ${seq_length} \
       --train-iters ${steps} \
       --train-data webtext \
       --lazy-loader \
       --tokenizer-type GPT2BPETokenizer \
       --split 949,50,1 \
       --distributed-backend nccl \
       --lr 0.00015 \
       --no-load-optim \
       --lr-decay-style cosine \
       --weight-decay 1e-2 \
       --clip-grad 1.0 \
       --warmup .01 \
       --checkpoint-activations \
       --checkpoint-num-layers ${ckpt_num_layers} \
       --fp16 \
       --cache-dir /tmp/cache_dir \
       --log-interval 1 \
       ${other_args} \
       ${ds_opt} \
       ${zero_opt} \
"

work_dir="../../../DeepSpeedExamples/Megatron-LM/"
run_cmd="(cd ${work_dir} && deepspeed --master_port ${master_port} --num_nodes $nodes --num_gpus $gpus pretrain_gpt2.py ${gpt_options})"
echo ${run_cmd}
eval ${run_cmd}

set +x