input_ids [65326, 108258, 67164, 130001, 130004, 5, 80842, 6, 87361, 66810, 64709, 6, 66284, 66810, 64165, 64426, 65120, 68938, 85536, 11943, 63850, 67077, 63823, 72457, 63826, 65077, 63891, 64219, 64884, 6, 69735, 63963, 6, 66975, 64279, 104030, 69009, 65114, 64572, 63823, 130005, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3]
inputs 问题,回答。
labelids [-100, -100, -100, -100, 130004, 5, 80842, 6, 87361, 66810, 64709, 6, 66284, 66810, 64165, 64426, 65120, 68938, 85536, 11943, 63850, 67077, 63823, 72457, 63826, 65077, 63891, 64219, 64884, 6, 69735, 63963, 6, 66975, 64279, 104030, 69009, 65114, 64572, 63823, 130005, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100]
labels <image-100>
OS: Ubuntu 20.04 Python: 3.8 Transformers: 4.26.1 PyTorch: 1.12 CUDA Support: True
bash train.sh
train.sh如下 PRE_SEQ_LEN=128 LR=2e-2
CUDA_VISIBLE_DEVICES=1 python3 main.py \ --do_train \ --train_file mydata/train.json \ --validation_file mydata/dev.json \ --prompt_column content \ --response_column summary \ --overwrite_cache \ --model_name_or_path THUDM/chatglm-6b \ --output_dir output/adgen-chatglm-6b-pt-12-$PRE_SEQ_LEN-$LR \ --overwrite_output_dir \ --max_source_length 64 \ --max_target_length 128 \ --per_device_train_batch_size 1 \ --per_device_eval_batch_size 1 \ --gradient_accumulation_steps 16 \ --predict_with_generate \ --max_steps 300 \ --logging_steps 50 \ --save_steps 10 \ --learning_rate $LR \ --pre_seq_len $PRE_SEQ_LEN \ --quantization_bit 4
Environment- OS:
- Python:
- Transformers:
- PyTorch:
- CUDA Support (`python -c "import torch; print(torch.cuda.is_available())"`) :