Skip to content

Commit 10adc44

Browse files
author
wangzaijun
committed
fix
1 parent 48fd3bb commit 10adc44

File tree

4 files changed

+4
-11
lines changed

4 files changed

+4
-11
lines changed

README.md

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,11 @@ LightLLM is a Python-based LLM (Large Language Model) inference and serving fram
2222

2323
## Tech Blogs
2424
- [2025/11] 🚀 Prefix KV Cache Transfer between DP rankers is now supported! Check out the technical deep dive in our [blog post](https://light-ai.top/lightllm-blog/2025/11/18/dp_kv_fetch.html).
25-
- [2025/05] LightLLM paper on constrained decoding accepted by [ACL2025](https://arxiv.org/pdf/2506.03887) (Pre $^3$: Enabling Deterministic Pushdown Automata for Faster Structured LLM Generation). For a more accessible overview of the research with key insights and examples, check out our blog post: [LightLLM Blog](https://www.light-ai.top/lightllm-blog/2025/06/15/pre3.html)
2625

2726
## News
28-
2927
- [2025/09] 🔥 LightLLM [v1.1.0](https://www.light-ai.top/lightllm-blog/2025/09/03/lightllm.html) release!
3028
- [2025/08] Pre $^3$ achieves the outstanding paper award of [ACL2025](https://2025.aclweb.org/program/awards/).
29+
- [2025/05] LightLLM paper on constrained decoding accepted by [ACL2025](https://arxiv.org/pdf/2506.03887) (Pre $^3$: Enabling Deterministic Pushdown Automata for Faster Structured LLM Generation). For a more accessible overview of the research with key insights and examples, check out our blog post: [LightLLM Blog](https://www.light-ai.top/lightllm-blog/2025/06/15/pre3.html)
3130
- [2025/04] LightLLM paper on request scheduler published in [ASPLOS’25](https://dl.acm.org/doi/10.1145/3676641.3716011) (Past-Future Scheduler for LLM Serving under SLA Guarantees)
3231
- [2025/02] 🔥 LightLLM v1.0.0 release, achieving the **fastest DeepSeek-R1** serving performance on single H200 machine.
3332

lightllm/server/router/model_infer/mode_backend/base_backend.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,8 @@ def init_model(self, kvargs):
213213
self.args.run_mode in ["nixl_prefill", "nixl_decode", "prefill", "decode"]
214214
or self.args.enable_dp_prompt_cache_fetch
215215
):
216+
# 如果存在需要跨进程使用mem manger的特性,则将mem manager写入到 shm中,方便
217+
# 读取
216218
self.model.mem_manager.write_to_shm(req_manager=self.model.req_manager)
217219
dist.barrier(group=self.node_nccl_group)
218220

@@ -229,9 +231,6 @@ def init_model(self, kvargs):
229231
if self.args.mtp_mode:
230232
self.init_mtp_draft_model(kvargs)
231233

232-
# 如果存在需要跨进程使用mem manger的特性,则将mem manager写入到 shm中,方便
233-
# 读取
234-
235234
# 启动infer_loop_thread, 启动两个线程进行推理,对于具备双batch推理折叠得场景
236235
# 可以降低 cpu overhead,大幅提升gpu得使用率。
237236
self.infer_loop_thread = threading.Thread(target=self.infer_loop, daemon=True)

lightllm/server/router/model_infer/mode_backend/dp_backend/dp_shared_kv_trans.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import dataclasses
55
import torch
66
from typing import List
7-
from lightllm.common.mem_manager import MemoryManager
7+
from lightllm.common.kv_cache_mem_manager import MemoryManager
88
from lightllm.utils.envs_utils import get_unique_server_name, get_env_start_args
99
from lightllm.utils.dist_utils import get_dp_rank_in_node
1010
from lightllm.server.core.objs.shm_array import ShmArray

lightllm/server/router/model_infer/mode_backend/dp_backend/impl.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,8 @@
11
import torch
22
import time
3-
import numpy as np
4-
import os
53
import torch.nn.functional as F
64
import torch.distributed as dist
75
from typing import List, Tuple, Optional, Callable
8-
from lightllm.common.kv_trans_kernel.kv_trans_v2 import kv_trans_for_dp
96
from lightllm.server.router.model_infer.mode_backend.base_backend import ModeBackend
107
from lightllm.common.basemodel.batch_objs import ModelOutput, ModelInput
118
from lightllm.server.router.model_infer.infer_batch import InferSamplingParams, g_infer_context, InferReq
@@ -26,8 +23,6 @@
2623
from lightllm.server.router.model_infer.pin_mem_manager import g_pin_mem_manager
2724
from lightllm.common.basemodel.triton_kernel.mtp_utils import mtp_scatter_next_token_ids
2825
from .control_state import DPControlState
29-
from lightllm.common.mem_manager import MemoryManager
30-
from .dp_shared_kv_trans import DPKVSharedMoudle
3126

3227

3328
class DPChunkedPrefillBackend(ModeBackend):

0 commit comments

Comments
 (0)