From 9f34fbf84ec1c075168deba58933a7f45d513f5a Mon Sep 17 00:00:00 2001
From: "qingnang.lh" <qingnang.lh@antgroup.com>
Date: Thu, 4 Dec 2025 14:07:46 +0800
Subject: [PATCH] support LoRA method

---
 F2LLM/README.md                | 22 ++++++++++++++++++++++
 F2LLM/arguments.py             |  6 ++++++
 F2LLM/configs/config_lora.json | 24 ++++++++++++++++++++++++
 F2LLM/model.py                 | 29 ++++++++++++++++++++++++++++-
 F2LLM/requirements.txt         | 15 ++++++++-------
 F2LLM/run.py                   |  2 ++
 6 files changed, 90 insertions(+), 8 deletions(-)
 create mode 100644 F2LLM/configs/config_lora.json

diff --git a/F2LLM/README.md b/F2LLM/README.md
index 6b79819..4375076 100644
--- a/F2LLM/README.md
+++ b/F2LLM/README.md
@@ -42,6 +42,28 @@ where N_NODE is the number of machines; N_PROCESSES is N_NODE\*8; MASTER_IP is t
 
 On worker nodes, also run the above commmand but modify `machine_rank` accordingly.
 
+### LoRA Training
+
+This repository now supports Parameter-Efficient Fine-Tuning (PEFT) using LoRA (Low-Rank Adaptation) to significantly reduce computational costs and memory usage during training.
+
+To use LoRA training:
+
+1. Add LoRA parameters to your config file (see `configs/config_lora.json` for an example):
+   ```json
+   {
+     "use_lora": true,
+     "lora_r": 8,
+     "lora_alpha": 32,
+     "lora_dropout": 0.1,
+     "lora_target_modules": "q_proj,v_proj"
+   }
+   ```
+
+2. Run training with the LoRA config:
+   ```
+   accelerate launch --config_file configs/accelerate_config.yaml run.py --config configs/config_lora.json
+   ```
+
 ### Citation
 
 If you use the F2LLM models, data, or code, please cite the following technical report.
diff --git a/F2LLM/arguments.py b/F2LLM/arguments.py
index b967c8f..45e756f 100644
--- a/F2LLM/arguments.py
+++ b/F2LLM/arguments.py
@@ -27,6 +27,12 @@ class Args:
     log_interval: int = 20
     checkpointing_steps: int = 100
     validation_steps: int = 100
+    # LoRA settings
+    use_lora: bool = False
+    lora_r: int = 8
+    lora_alpha: int = 32
+    lora_dropout: float = 0.1
+    lora_target_modules: str = "q_proj,v_proj"
     # just placeholder, for logging purpose
     num_processes: int=0
 
diff --git a/F2LLM/configs/config_lora.json b/F2LLM/configs/config_lora.json
new file mode 100644
index 0000000..e92a15d
--- /dev/null
+++ b/F2LLM/configs/config_lora.json
@@ -0,0 +1,24 @@
+{
+  "model_path": "models/qwen3-4b",
+  "experiment_id": "4b+lr.8e-6+bs.16x32+context.1024+2epochs+lora",
+  "train_data_path": "training_data/data_tokenized_qwen",
+  "output_dir": "output",
+  "tb_dir": "output/tb",
+  "cache_dir": "cache",
+  "train_batch_size": 16,
+  "checkpointing_steps": 5000,
+  "validation_steps": 5000,
+  "max_seq_length": 1024,
+  "learning_rate": 8e-6,
+  "min_lr": 1e-7,
+  "weight_decay": 0.01,
+  "warmup_steps": 500,
+  "train_epochs": 2,
+  "log_interval": 100,
+  "num_hard_neg": 7,
+  "use_lora": true,
+  "lora_r": 8,
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "lora_target_modules": "q_proj,v_proj"
+}
\ No newline at end of file
diff --git a/F2LLM/model.py b/F2LLM/model.py
index d33ade7..0ea4795 100644
--- a/F2LLM/model.py
+++ b/F2LLM/model.py
@@ -1,5 +1,6 @@
 import torch
 from transformers import AutoModel, AutoTokenizer
+from peft import get_peft_model, LoraConfig, TaskType
 
 
 class F2LLM:
@@ -16,6 +17,33 @@ def __init__(self,
         self.lm.config.use_cache = False
         self.tokenizer = AutoTokenizer.from_pretrained(model_path)
         self.max_seq_length = max_seq_length
+        
+        # Apply LoRA if enabled
+        if args and args.use_lora:
+            self._apply_lora()
+            
+        # Enable gradient requirements for LoRA with flash attention
+        if hasattr(self.lm, 'enable_input_require_grads'):
+            self.lm.enable_input_require_grads()
+
+    def _apply_lora(self):
+        """Apply LoRA adaptation to the model"""
+        # Print LoRA training message
+        print("Using LoRA training, optimizing only LoRA parameters")
+        
+        target_modules = self.args.lora_target_modules.split(",") if self.args.lora_target_modules else None
+        
+        peft_config = LoraConfig(
+            task_type=TaskType.CAUSAL_LM,  # For decoder-only models
+            inference_mode=False,
+            r=self.args.lora_r,
+            lora_alpha=self.args.lora_alpha,
+            lora_dropout=self.args.lora_dropout,
+            target_modules=target_modules
+        )
+        
+        self.lm = get_peft_model(self.lm, peft_config)
+        self.lm.print_trainable_parameters()
 
     def set_device(self):
         self.device = self.lm.device
@@ -34,4 +62,3 @@ def forward(self, batch):
             'passage_passage_features': torch.stack([passage_features_all_tokens[i, [batch['seq_lens'][i]-1]] for i in range(bs, 2*bs)]),
             'negative_passage_features': None if num_hard_neg == 0 else torch.stack([passage_features_all_tokens[i, [batch['seq_lens'][i]-1]] for i in range(2*bs, len(batch['seq_lens']))]).view(bs, num_hard_neg, -1)
         }
-
diff --git a/F2LLM/requirements.txt b/F2LLM/requirements.txt
index 82fb447..365ddf8 100644
--- a/F2LLM/requirements.txt
+++ b/F2LLM/requirements.txt
@@ -1,7 +1,8 @@
-accelerate
-datasets
-deepspeed
-flash-attn
-torch
-transformers
-tensorboard
+accelerate==1.3.0
+datasets==2.21.0
+deepspeed==0.16.2
+flash-attn==2.3.6+pack.glm.mask
+torch==2.4.0+cu124
+transformers==4.51.0
+tensorboard==2.20.0
+peft==0.3.0
diff --git a/F2LLM/run.py b/F2LLM/run.py
index e40b707..933cecb 100644
--- a/F2LLM/run.py
+++ b/F2LLM/run.py
@@ -124,6 +124,8 @@ def __iter__(self):
 # set seed again to make sure that different models share the same seed
 set_seed(0)
 
+if args.use_lora:
+    accelerator.print("Using LoRA training, optimizing only LoRA parameters")
 optimizer = AdamW(model.lm.parameters(),
                   weight_decay=args.weight_decay,
                   lr=args.learning_rate,