-
Notifications
You must be signed in to change notification settings - Fork 1.3k
/
run_finetuning.sh
64 lines (52 loc) · 2.91 KB
/
run_finetuning.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
set -eu
set -x
#bash -x ./env.sh
TASK_NAME=$1
CONF_FILE=$2
VOCAB_PATH=$3
ERNIE_VIL_CONFIG=$4
PRETRAIN_MODELS=$5
source $CONF_FILE
#configure your cuda and cudnn
#configure nccl
#export LD_LIBRARY_PATH=/home/work/cuda-9.0/lib64:/home/work/cudnn/cudnn_v7/cuda/lib64:$LD_LIBRARY_PATH
#export LD_LIBRARY_PATH=./nccl_2.3.5/lib/:$LD_LIBRARY_PATH
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fraction_of_gpu_memory_to_use=0.98
e_executor=$(echo ${use_experimental_executor-'True'} | tr '[A-Z]' '[a-z]')
use_fuse=$(echo ${use_fuse-'False'} | tr '[A-Z]' '[a-z]')
if [[ ${use_fuse} == "true" ]]; then
export FLAGS_fuse_parameter_memory_size=131072
export FLAGS_fuse_parameter_groups_size=10
fi
TASK_GROUP_JSON=./conf/$TASK_NAME/task_${TASK_NAME}.json
gpu_cnt=`echo $CUDA_VISIBLE_DEVICES | awk -F"\t" '{len=split($0,vec,",");print len}'`
echo "gpu_cnt", $gpu_cnt
python finetune.py --use_cuda "True" \
--is_distributed "False" \
--use_fast_executor ${e_executor-"True"} \
--nccl_comm_num ${nccl_comm_num:-"1"} \
--batch_size $((BATCH_SIZE/gpu_cnt)) \
--do_train "True" \
--do_test "False" \
--task_name ${TASK_NAME} \
--vocab_path ${VOCAB_PATH} \
--task_group_json ${TASK_GROUP_JSON} \
--lr_scheduler ${lr_scheduler} \
--decay_steps ${decay_steps-""} \
--lr_decay_ratio ${lr_decay_ratio-0.1} \
--layer_decay_rate ${layer_decay_rate-0.0} \
--text_init_layers ${text_init_layers-18} \
--n_layers ${n_layers-30} \
--margin ${margin-0.3} \
--num_train_steps ${num_train_steps} \
--checkpoints $output_model_path \
--save_steps ${SAVE_STEPS} \
--init_checkpoint ${PRETRAIN_MODELS} \
--ernie_config_path ${ERNIE_VIL_CONFIG} \
--learning_rate ${LR_RATE} \
--warmup_steps ${WARMUP_STEPS} \
--weight_decay ${WEIGHT_DECAY:-0} \
--max_seq_len ${MAX_LEN} \
--skip_steps 10