目标
Dataset Creation.
Base Model Choice
步骤
1. 安装包
1 2 3 4 5 6 7 8 !pip install -qqq bitsandbytes==0.39 .0 !pip install -qqq torch==2.0 .1 !pip install -qqq -U git+https://github.com/huggingface/transformers.git@e03a9cc !pip install -qqq -U git+https://github.com/huggingface/peft.git@42a184f !pip install -qqq -U git+https://github.com/huggingface/accelerate.git@c9fbb71 !pip install -qqq datasets==2.12 .0 !pip install -qqq loralib==0.1 .1 !pip install -qqq einops==0.6 .1
2. 导包
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 import pandas as pdimport jsonimport osfrom pprint import pprintimport bitsandbytes as bnbimport torchimport torch.nn as nnimport transformersfrom datasets import load_dataset, Datasetfrom huggingface_hub import notebook_loginfrom peft import LoraConfig, PeftConfig, PeftModel, get_peft_model, prepare_model_for_kbit_trainingfrom transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfigos.environ["CUDA_VISIBLE_DEVICES" ] = "0,1"
3. 加载4bits模型
原始论文地址: https://arxiv.org/abs/2110.02861
量化技术介绍:https://huggingface.co/blog/zh/hf-bitsandbytes-integration
为什么会有量化技术?
大模型需要大量GPU才能运行,因此我们需要找到降低资源需求且同时保持模型特性的方法。
bitsandbytes核心技术
1 2 3 4 5 6 7 8 9 10 11 12 13 MODEL_NAME = 'model/phi-2/' bnb_config = BitsAndBytesConfig( load_in_4bit=True , bnb_4bit_use_double_quant=True , bnb_4bit_quant_type="nf4" , bnb_4bit_compute_dtype=torch.bfloat16 ) model = AutoModelForCausalLM.from_pretrained( MODEL_NAME, device_map="auto" , trust_remote_code=True , quantization_config=bnb_config )
4. 不开启梯度检查
梯度检查 :为了减少激活的内存,我们可以开启梯度检查。梯度检查点技术通过在前向传播过程中丢掉一些中间结果,仅保留必要的信息(检查点),来减少内存的使用量。在反向传播过程中,如果需要被丢弃的中间结果时,再重新计算。这样可以显著减少内存消耗,使得训练更深的网络成为可能。
这里没开启。
1 model = prepare_model_for_kbit_training(model, use_gradient_checkpointing=False )
5. 加载分词器
1 2 3 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) tokenizer.pad_token = tokenizer.eos_token
6. 加载lora层
1 2 3 4 5 6 7 8 9 10 config = LoraConfig( r=2 , lora_alpha=32 , target_modules=get_last_layer_linears(model), lora_dropout=0.05 , bias="none" , task_type="CAUSAL_LM" ) model = get_peft_model(model, config) model.to(device)
7. 加载数据集
1 2 3 4 5 6 7 8 9 10 11 12 def generate_prompt (data_point ): return f"""{data_point["Question" ]} . Answer as briefly as possible: {data_point["Answer" ]} """ .strip() def generate_and_tokenize_prompt (tokenizer, data_point ): full_prompt = generate_prompt(data_point) tokenized_full_prompt = tokenizer(full_prompt, padding=True , truncation=True ) return tokenized_full_prompt df = pd.read_csv("assets/JEOPARDY.csv" ) df.columns = [str (q).strip() for q in df.columns] data = Dataset.from_pandas(df) data = data.shuffle().map (lambda x: generate_and_tokenize_prompt(tokenizer, x))
8. 开启训练
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 training_args = transformers.TrainingArguments( per_device_train_batch_size=1 , gradient_accumulation_steps=4 , num_train_epochs=1 , learning_rate=1e-4 , fp16=True , output_dir="finetune_jeopardy" , optim="paged_adamw_8bit" , lr_scheduler_type="cosine" , warmup_ratio=0.01 , report_to="none" ) trainer = transformers.Trainer( model=model, train_dataset=data, args=training_args, data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False ) ) model.config.use_cache = False trainer.train()
9. 保存模型
1 model.save_pretrained("trained-model" )