import torch from peft import PeftModel from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor # --- 1. Define your model paths --- base_model_path = "Qwen/Qwen2.5-VL-3B-Instruct" # The original student model adapter_path = "/home/azureuser/finetuned_models/qwen2.5_vl/lora/Qwen2.5-VL-3B_distill_all_nolabel" # The folder where your LoRA adapter was saved merged_model_path = "/home/azureuser/finetuned_models/qwen2.5_vl/Qwen2.5-VL-3B_distill_merged_all_nolabel" # Where to save the new, merged model print("Loading base model...") # --- 2. Load the base model --- # Loading on the CPU base_model = Qwen2_5_VLForConditionalGeneration.from_pretrained( base_model_path, torch_dtype=torch.bfloat16, trust_remote_code=True, device_map="cpu", ) print("Loading LoRA adapter...") # --- 3. Load the LoRA adapter onto the base model --- model = PeftModel.from_pretrained(base_model, adapter_path) print("Merging adapter into the base model...") # --- 4. Merge the weights --- # Combines the LoRA weights into the base model's layers. model = model.merge_and_unload() print(f"Saving merged model to {merged_model_path}...") # --- 5. Save the new, standalone model --- # The saved model is a standard Hugging Face model. model.save_pretrained(merged_model_path) # --- 6. Save the processor for easy use later --- processor = AutoProcessor.from_pretrained(base_model_path, trust_remote_code=True) processor.save_pretrained(merged_model_path) print("Merge complete!")