train_emoon.sh 1.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253
  1. #! /bin/bash
  2. #SBATCH --nodes=1
  3. #SBATCH --ntasks=1
  4. #SBATCH --cpus-per-task=8
  5. #SBATCH --mem=100G
  6. #SBATCH -p gpu
  7. #SBATCH --gres gpu:1
  8. #SBATCH --partition=gpu_h100
  9. #SBATCH --time=01-00:00:00
  10. #SBATCH -o /scratch-shared/dwu18/cache/logs/out.emoon.%j.o
  11. #SBATCH -o /scratch-shared/dwu18/cache/logs/out.emoon.%j.e
  12. source activate py38cuda11
  13. # source activate calibration
  14. export HF_HUB_CACHE=/gpfs/work4/0/gus20642/dwu18/cache
  15. export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
  16. ################## MAIN ##################
  17. LR=$1
  18. SETTING=emoon_$LR
  19. TEST_DATASET=enote_dataset
  20. echo $LR
  21. python -m llama_recipes.finetuning --use_peft --peft_method lora \
  22. --model_name meta-llama/Llama-2-7b-hf \
  23. --output_dir ./checkpoints/7B/emoon/${SETTING} \
  24. --dataset enote_dataset \
  25. --rule_names "ryjlzs0001,ryjlzs0002,ryjlzs0004,ryjlzs0005,ryjlxbs0001,ryjljws0003,ryjljws0004,ryjljws0006,ryjljws0007,ryjljws0008,ryjljws0010" \
  26. --batching_strategy padding \
  27. --num_epochs 2 \
  28. --lr $LR \
  29. --batch_size_training 1 \
  30. --val_batch_size 1 \
  31. --gradient_accumulation_steps 8 \
  32. --use_wandb
  33. for EPOCH in 0; do
  34. BASE_SYS=results/emoon/${SETTING}-beam1/${EPOCH}
  35. python inference_formal.py --model_name meta-llama/Llama-2-7b-hf \
  36. --peft_model ./checkpoints/7B/emoon/${SETTING}/${EPOCH} \
  37. --dataset ${TEST_DATASET} \
  38. --val_batch_size 1 \
  39. --do_sample False \
  40. --output_dir ${BASE_SYS} \
  41. --rule_names "ryjlzs0001,ryjlzs0002" \
  42. --beam_size 1
  43. done