CUDA_VISIBLE_DEVICES=0 accelerate launch --config_file configs/default_config.yaml distill.py \
--teacher_generation_dataset_path /path/to/teacher/dataset \
--student_name /path/to/model \
--output_dir /path/to/output/dir/ \
--method forward_kl_text2text \
--use_norm linear \
--norm_epsilon 1e-6 \
CUDA_VISIBLE_DEVICES=0 accelerate launch --config_file configs/default_config.yaml distill.py \
--teacher_generation_dataset_path /path/to/teacher/dataset \
--student_name /path/to/model \
--output_dir /path/to/output/dir/ \
--method forward_kl_text2text \
--use_norm softmax \
--student_temp 1.0 \
--teacher_temp 1.0 \
CUDA_VISIBLE_DEVICES=0 accelerate launch --config_file configs/default_config.yaml distill.py \
--teacher_generation_dataset_path /path/to/teacher/dataset \
--student_name /path/to/model \
--output_dir /path/to/output/dir/ \
--method forward_kl_text2text \
--use_norm linear \
--norm_epsilon 1e-6 \
--user_label_smoothing yes \
--smoothing_factor 0.1 \
CUDA_VISIBLE_DEVICES=0 accelerate launch --config_file configs/default_config.yaml distill.py \
--teacher_generation_dataset_path /path/to/teacher/dataset \
--student_name /path/to/model \
--output_dir /path/to/output/dir/ \
--method forward_kl_text2text \
--use_other_token yes \