72 lines
1.7 KiB
Bash
72 lines
1.7 KiB
Bash
![]() |
#!/bin/bash
|
||
|
DIR=`pwd`
|
||
|
DATETIME=`date +'date_%y-%m-%d_time_%H-%M-%S'`
|
||
|
|
||
|
if [ $MASTER_ADDR ];then
|
||
|
echo $MASTER_ADDR
|
||
|
echo $MASTER_PORT
|
||
|
echo $WORLD_SIZE
|
||
|
echo $RANK
|
||
|
else
|
||
|
MASTER_ADDR=127.0.0.1
|
||
|
MASTER_PORT=2$(($RANDOM % 10))$(($RANDOM % 10))15
|
||
|
WORLD_SIZE=1
|
||
|
RANK=0
|
||
|
fi
|
||
|
|
||
|
DISTRIBUTED_ARGS="--nproc_per_node 8 \
|
||
|
--nnodes ${WORLD_SIZE} \
|
||
|
--node_rank ${RANK} \
|
||
|
--master_addr ${MASTER_ADDR} \
|
||
|
--master_port ${MASTER_PORT}"
|
||
|
|
||
|
EXP_NAME=sft_v0.1
|
||
|
SAVE_NAME=sft_v0.1_ft_grad_ckpt
|
||
|
|
||
|
SAVE_PATH="./output/${SAVE_NAME}/"
|
||
|
|
||
|
max_length=2048
|
||
|
micro_batch_size=1
|
||
|
global_batch_size=256
|
||
|
gradient_accumulation_steps=4
|
||
|
|
||
|
# train_iters = total_data * train_epochs // global_batch_size
|
||
|
# 361481 * 3 / 256 = 4236
|
||
|
train_epochs=3
|
||
|
train_iters=4236
|
||
|
|
||
|
lr_warmup_iters=50
|
||
|
lr_decay_iters=`expr $train_iters - $lr_warmup_iters`
|
||
|
|
||
|
eval_iter=50
|
||
|
eval_interval=50
|
||
|
save_interval=500
|
||
|
|
||
|
mkdir -p ${SAVE_PATH}
|
||
|
|
||
|
options=" \
|
||
|
--pretrained-ckpt MAGAer13/mplug-owl-llama-7b-pt \
|
||
|
--seq-length ${max_length} \
|
||
|
--micro-batch-size ${micro_batch_size} \
|
||
|
--train-epochs ${train_epochs} \
|
||
|
--num-warmup-steps ${lr_warmup_iters} \
|
||
|
--num-training-steps ${train_iters} \
|
||
|
--gradient-accumulation-steps ${gradient_accumulation_steps} \
|
||
|
--lr 1e-5 \
|
||
|
--min-lr 1e-6 \
|
||
|
--eval-iters ${eval_iter} \
|
||
|
--save-interval ${save_interval} \
|
||
|
--save-path ${SAVE_PATH} \
|
||
|
--clip-grad 1.0 \
|
||
|
--weight-decay 0.0001 \
|
||
|
--adam-beta1 0.9 \
|
||
|
--adam-beta2 0.999 \
|
||
|
--num-workers 32 \
|
||
|
--gradient-checkpointing \
|
||
|
--bf16"
|
||
|
|
||
|
multimodal_options=" \
|
||
|
--mm-config configs/v0.yaml
|
||
|
"
|
||
|
|
||
|
python -m torch.distributed.launch $DISTRIBUTED_ARGS ./pipeline/train.py $@ ${options} ${multimodal_options} 2>&1 | tee ${SAVE_PATH}/train.log
|