add readme (#10)
* Update Readme.md * Update Readme.md * Update Readme.md * Update Readme.md * Update Readme.md * Update Readme.md * Update Readme.md * Update Readme.md * Update Readme.md * Update Readme.md * Update Readme.md * Update Readme.md * Update Readme.md * remove submodule * add mPLUG MiniGPT4 * Update Readme.md * Update Readme.md * Update Readme.md --------- Co-authored-by: Yuliang Liu <34134635+Yuliang-Liu@users.noreply.github.com>
This commit is contained in:
72
models/mPLUG_Owl/scripts/train_it.sh
Normal file
72
models/mPLUG_Owl/scripts/train_it.sh
Normal file
@@ -0,0 +1,72 @@
|
||||
#!/bin/bash
|
||||
DIR=`pwd`
|
||||
DATETIME=`date +'date_%y-%m-%d_time_%H-%M-%S'`
|
||||
|
||||
if [ $MASTER_ADDR ];then
|
||||
echo $MASTER_ADDR
|
||||
echo $MASTER_PORT
|
||||
echo $WORLD_SIZE
|
||||
echo $RANK
|
||||
else
|
||||
MASTER_ADDR=127.0.0.1
|
||||
MASTER_PORT=2$(($RANDOM % 10))$(($RANDOM % 10))15
|
||||
WORLD_SIZE=1
|
||||
RANK=0
|
||||
fi
|
||||
|
||||
DISTRIBUTED_ARGS="--nproc_per_node 8 \
|
||||
--nnodes ${WORLD_SIZE} \
|
||||
--node_rank ${RANK} \
|
||||
--master_addr ${MASTER_ADDR} \
|
||||
--master_port ${MASTER_PORT}"
|
||||
|
||||
EXP_NAME=sft_v0.1
|
||||
SAVE_NAME=sft_v0.1_ft_grad_ckpt
|
||||
|
||||
SAVE_PATH="./output/${SAVE_NAME}/"
|
||||
|
||||
max_length=2048
|
||||
micro_batch_size=4
|
||||
global_batch_size=256
|
||||
gradient_accumulation_steps=1
|
||||
|
||||
# train_iters = total_data * train_epochs // global_batch_size
|
||||
# 361481 * 3 / 256 = 4236
|
||||
train_epochs=3
|
||||
train_iters=4236
|
||||
|
||||
lr_warmup_iters=50
|
||||
|
||||
eval_iter=50
|
||||
eval_interval=50
|
||||
save_interval=500
|
||||
|
||||
mkdir -p ${SAVE_PATH}
|
||||
|
||||
options=" \
|
||||
--pretrained-ckpt MAGAer13/mplug-owl-llama-7b-pt \
|
||||
--seq-length ${max_length} \
|
||||
--micro-batch-size ${micro_batch_size} \
|
||||
--num-training-steps ${train_iters} \
|
||||
--train-epochs ${train_epochs} \
|
||||
--num-warmup-steps ${lr_warmup_iters} \
|
||||
--gradient-accumulation-steps ${gradient_accumulation_steps} \
|
||||
--lr 2e-5 \
|
||||
--min-lr 1e-6 \
|
||||
--eval-iters ${eval_iter} \
|
||||
--save-interval ${save_interval} \
|
||||
--save-path ${SAVE_PATH} \
|
||||
--clip-grad 1.0 \
|
||||
--weight-decay 0.0001 \
|
||||
--adam-beta1 0.9 \
|
||||
--adam-beta2 0.999 \
|
||||
--num-workers 32 \
|
||||
--use-lora \
|
||||
--gradient-checkpointing \
|
||||
--bf16"
|
||||
|
||||
multimodal_options=" \
|
||||
--mm-config configs/v0.yaml
|
||||
"
|
||||
|
||||
python -m torch.distributed.launch $DISTRIBUTED_ARGS ./pipeline/train.py $@ ${options} ${multimodal_options} 2>&1 | tee ${SAVE_PATH}/train.log
|
72
models/mPLUG_Owl/scripts/train_it_wo_lora.sh
Normal file
72
models/mPLUG_Owl/scripts/train_it_wo_lora.sh
Normal file
@@ -0,0 +1,72 @@
|
||||
#!/bin/bash
|
||||
DIR=`pwd`
|
||||
DATETIME=`date +'date_%y-%m-%d_time_%H-%M-%S'`
|
||||
|
||||
if [ $MASTER_ADDR ];then
|
||||
echo $MASTER_ADDR
|
||||
echo $MASTER_PORT
|
||||
echo $WORLD_SIZE
|
||||
echo $RANK
|
||||
else
|
||||
MASTER_ADDR=127.0.0.1
|
||||
MASTER_PORT=2$(($RANDOM % 10))$(($RANDOM % 10))15
|
||||
WORLD_SIZE=1
|
||||
RANK=0
|
||||
fi
|
||||
|
||||
DISTRIBUTED_ARGS="--nproc_per_node 8 \
|
||||
--nnodes ${WORLD_SIZE} \
|
||||
--node_rank ${RANK} \
|
||||
--master_addr ${MASTER_ADDR} \
|
||||
--master_port ${MASTER_PORT}"
|
||||
|
||||
EXP_NAME=sft_v0.1
|
||||
SAVE_NAME=sft_v0.1_ft_grad_ckpt
|
||||
|
||||
SAVE_PATH="./output/${SAVE_NAME}/"
|
||||
|
||||
max_length=2048
|
||||
micro_batch_size=1
|
||||
global_batch_size=256
|
||||
gradient_accumulation_steps=4
|
||||
|
||||
# train_iters = total_data * train_epochs // global_batch_size
|
||||
# 361481 * 3 / 256 = 4236
|
||||
train_epochs=3
|
||||
train_iters=4236
|
||||
|
||||
lr_warmup_iters=50
|
||||
lr_decay_iters=`expr $train_iters - $lr_warmup_iters`
|
||||
|
||||
eval_iter=50
|
||||
eval_interval=50
|
||||
save_interval=500
|
||||
|
||||
mkdir -p ${SAVE_PATH}
|
||||
|
||||
options=" \
|
||||
--pretrained-ckpt MAGAer13/mplug-owl-llama-7b-pt \
|
||||
--seq-length ${max_length} \
|
||||
--micro-batch-size ${micro_batch_size} \
|
||||
--train-epochs ${train_epochs} \
|
||||
--num-warmup-steps ${lr_warmup_iters} \
|
||||
--num-training-steps ${train_iters} \
|
||||
--gradient-accumulation-steps ${gradient_accumulation_steps} \
|
||||
--lr 1e-5 \
|
||||
--min-lr 1e-6 \
|
||||
--eval-iters ${eval_iter} \
|
||||
--save-interval ${save_interval} \
|
||||
--save-path ${SAVE_PATH} \
|
||||
--clip-grad 1.0 \
|
||||
--weight-decay 0.0001 \
|
||||
--adam-beta1 0.9 \
|
||||
--adam-beta2 0.999 \
|
||||
--num-workers 32 \
|
||||
--gradient-checkpointing \
|
||||
--bf16"
|
||||
|
||||
multimodal_options=" \
|
||||
--mm-config configs/v0.yaml
|
||||
"
|
||||
|
||||
python -m torch.distributed.launch $DISTRIBUTED_ARGS ./pipeline/train.py $@ ${options} ${multimodal_options} 2>&1 | tee ${SAVE_PATH}/train.log
|
Reference in New Issue
Block a user