带有embeding 同时训练的Lora 权重合并,合并后的权重的模型,再训练数的Loss 突然增加
带有embeding 同时训练的Lora 权重合并from peftimportPeftModel from transformersimportAutoModelForCausalLM, AutoTokenizer base_model_path/root/private_data/models/deepseek-ai/6epoch-mergedadapter_path/root/private_data/output/sharedata/checkpoint-10500/output_path/root/private_data/models/deepseek-ai/merged_correct# 加载基础模型modelAutoModelForCausalLM.from_pretrained(base_model_path,torch_dtypebfloat16,device_mapcuda:0)# 加载 adaptermodelPeftModel.from_pretrained(model, adapter_path)# 合并modelmodel.merge_and_unload()# 保存model.save_pretrained(output_path)tokenizerAutoTokenizer.from_pretrained(base_model_path)tokenizer.save_pretrained(output_path)错误的写法 合并后的权重的模型再训练数的Loss 突然增加### 合并llamafactory lora 到新的模型### 注意 --export_dir这个路径要新的路径### 新训练的 checkpoint路径不要用原来checkpoint路径否则会lora对不齐错误### 对应带有embedding 不合适 会丢失信息llamafactory-cliexport\--model_name_or_path/root/private_data/models/deepseek-ai/6epoch-merged\--adapter_name_or_path/root/private_data/output/binding_sft_short_part_004-005_lora64_epoch1/checkpoint-9000\--templatealpaca\--finetuning_typelora\--export_dir/root/private_data/models/deepseek-ai/6epoch-merged-lora64_data005\--export_size4\--export_deviceauto\--export_legacy_formatfalse错误写法2importtorchimportjsonimportosimportshutil from safetensors.torchimportload_file, save_file def merge_full_optimized(base_model_path: str, adapter_path: str, output_merged_path: str, device: strcuda:0): print(f [START] 开始全量合并流程 (LoRA Embedding)...)# 1. 加载 Adapter 权重并解析 LoRA 配置print(f 加载 Adapter 权重...)adapter_weightsload_file(os.path.join(adapter_path,adapter_model.safetensors),devicedevice)with open(os.path.join(adapter_path,adapter_config.json),r)as f: lora_configjson.load(f)rlora_config[r]alphalora_config[lora_alpha]scalingalpha / r print(f✅ LoRA 配置读取成功: r{r}, alpha{alpha}, scaling{scaling})# 2. 【修复】准备输出目录先从基础模型复制配置文件os.makedirs(output_merged_path,exist_okTrue)print(f 从基础模型复制配置文件...)forfileinos.listdir(base_model_path): srcos.path.join(base_model_path,file)ifos.path.isfile(src)and not any(file.endswith(ex)forexin[.safetensors,.bin,.pt]): shutil.copy2(src, os.path.join(output_merged_path,file))# adapter 目录的非权重文件一般不需要adapter_config.json 不是合并模型的一部分# 如果 adapter 目录有其他特殊文件可以按需添加# 3. 核心合并逻辑分片处理safetensors_filessorted([fforfinos.listdir(base_model_path)iff.endswith(.safetensors)])weight_map{}new_num_tokensNoneforst_fileinsafetensors_files: print(f 处理分片: {st_file}...)src_pathos.path.join(base_model_path, st_file)dst_pathos.path.join(output_merged_path, st_file)state_dictload_file(src_path,devicedevice)updated_keys[]forkeyinlist(state_dict.keys()):# --- A. 处理 LoRA 合并 ---prefixbase_model.model.lora_a_keyf{prefix}{key.replace(.weight, .lora_A.weight)}lora_b_keyf{prefix}{key.replace(.weight, .lora_B.weight)}iflora_a_keyinadapter_weights and lora_b_keyinadapter_weights: Wstate_dict[key].to(torch.bfloat16)Aadapter_weights[lora_a_key].to(torch.bfloat16)Badapter_weights[lora_b_key].to(torch.bfloat16)state_dict[key]W (B A)* scaling updated_keys.append(key)# --- B. 处理 Embedding 和 LM_Head 替换 ---embed_save_keyf{prefix}{key}.modules_to_save.default.weightifembed_save_keyinadapter_weights: state_dict[key]adapter_weights[embed_save_key].to(state_dict[key].dtype)ifembed_tokensinkey: new_num_tokensstate_dict[key].shape[0]print(f 已替换模块: {key})# 保存分片cpu_state_dict{k: v.contiguous().cpu()fork,vinstate_dict.items()}save_file(cpu_state_dict, dst_path)forkinstate_dict.keys(): weight_map[k]st_file del state_dict, cpu_state_dict torch.cuda.empty_cache()# 4. 更新 config.jsonifnew_num_tokens: cfg_pathos.path.join(output_merged_path,config.json)ifos.path.exists(cfg_path): with open(cfg_path,r)as f: configjson.load(f)config[vocab_size]new_num_tokens f.seek(0)json.dump(config, f,indent2)f.truncate()print(f 已更新 config.json: vocab_size {new_num_tokens})else: print(f⚠️ 警告: 未找到 config.json无法更新 vocab_size)# 5. 生成索引total_sizesum(os.path.getsize(os.path.join(output_merged_path, f))forfinsafetensors_files)index_pathos.path.join(output_merged_path,model.safetensors.index.json)with open(index_path,w)as f: json.dump({metadata:{total_size:total_size},weight_map:weight_map}, f,indent2)print(f✨ [DONE] 全量合并成功输出目录: {output_merged_path})print(f 请确认以下文件存在:)forfin[config.json,tokenizer.json,tokenizer_config.json,model.safetensors.index.json]: pos.path.join(output_merged_path, f)status✅ifos.path.exists(p)else❌print(f {status} {f})if__name____main__:CONFIG{base_model_path:/root/private_data/models/deepseek-ai/6epoch-merged,adapter_path:/root/private_data/output/sharedata/checkpoint-10500/,output_merged_path:/root/private_data/models/deepseek-ai/6epoch-merged-sft,device:cuda:0}merge_full_optimized(**CONFIG)# 这是一个 Python 的语法技巧叫做 字典解包Dictionary Unpacking。# 简单来说CONFIG 的作用是将一个字典里的键值对# 拆解成一个个独立的关键字参数Keyword Arguments传给函数。# merge_full_optimized(**CONFIG)