Skip to content

Commit 9028d74

Browse files
committed
[fix]q_nope shape
1 parent fddc5c6 commit 9028d74

File tree

2 files changed

+2
-2
lines changed

2 files changed

+2
-2
lines changed

lightllm/models/deepseek2/layer_infer/transformer_layer_infer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -567,7 +567,7 @@ def _token_gqa_decode_attention_mtp(
567567
q=q_rope.reshape(-1, self.tp_q_head_num_ * self.mtp_size, self.qk_rope_head_dim),
568568
k_cache=k_rope,
569569
v_cache=kv_nope,
570-
qv=q_nope.reshape(-1, self.tp_q_head_num_ * self.mtp_size, self.qk_nope_head_dim),
570+
qv=q_nope.reshape(-1, self.tp_q_head_num_ * self.mtp_size, self.kv_lora_rank),
571571
page_table=infer_state.page_table[self.mtp_size - 1 :: self.mtp_size],
572572
cache_seqlens=infer_state.b_seq_len[self.mtp_size - 1 :: self.mtp_size],
573573
cu_seqlens_q=infer_state.cu_seqlens_q,

lightllm/models/deepseek2/model.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ def __init__(self, kvargs):
6969
return
7070

7171
def _init_inferstate_cls(self):
72-
if get_env_start_args().enable_fa3:
72+
if get_env_start_args().enable_fa3 or get_env_start_args().enable_fa3_mtp:
7373
self.infer_state_class = Deepseek2FlashAttentionStateInfo
7474
elif self.enable_flashinfer:
7575
self.infer_state_class = Deepseek2FlashInferStateInfo

0 commit comments

Comments
 (0)