We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent be9be09 commit b141935Copy full SHA for b141935
training/llama3.1/run-llama.sh
@@ -18,8 +18,10 @@ deepspeed --hostfile $HOSTFILE lora_script.py \
18
--model_name_or_path meta-llama/Meta-Llama-3.1-405B \
19
--tokenizer_name_or_path meta-llama/Meta-Llama-3.1-405B-Instruct \
20
--save_steps 200 --max_steps 1000 --max_train_samples 8000 \
21
+ --lora_r 64 --lora_alpha 64 \
22
--quantize --bits 8 \
23
--base_weight_sharding --offload --offload_ratio 0.75 \
24
--gradient_checkpointing --activation_checkpointing \
25
--learning_rate 2e-5 --lr_scheduler_type linear --warmup_ratio 0.02 \
26
--auth-token $HF_AUTH_TOKEN
27
+
0 commit comments