diff --git a/configs/kd_white_box.json b/configs/kd_white_box.json index 9cb13c8..ba53df3 100644 --- a/configs/kd_white_box.json +++ b/configs/kd_white_box.json @@ -19,7 +19,7 @@ "top_logits_num": 10 }, "distillation": { - "kd_ratio": 0.5, + "kd_ratio": 0.1, "max_seq_length": 512, "distillation_type": "forward_kld" }, @@ -32,6 +32,7 @@ "num_train_epochs": 3, "per_device_train_batch_size": 1, "gradient_accumulation_steps": 8, + "max_length":512, "save_steps": 1000, "logging_steps": 1, "learning_rate": 2e-5,