fix autotune and benchmark (#1037)

sufubao · sufubao · web-flow · commit 87a7af82f420 · 2025-09-03T15:01:47.000+08:00
Co-authored-by: sufubao &lt;1411045825@qq.com&gt;
diff --git a/lightllm/common/triton_utils/autotuner.py b/lightllm/common/triton_utils/autotuner.py
@@ -144,7 +144,9 @@ def __call__(self, *args, **kwargs):
         if static_key not in self.cached_configs and autotune_level == AutotuneLevel.USE_AUTOTUNE_HIS_CONFIG:
             if (dist.is_initialized() and get_current_rank_in_node() == 0) or not dist.is_initialized():
                 logger.warning(
-                    f"No kernel config for {self.kernel_name} in {KernelConfigs.get_config_file_name(static_key)}",
+                    f"No kernel config for {self.kernel_name} in {KernelConfigs.get_config_file_name(static_key)},"
+                    f"the performance may be suboptimal!"
+                    f"You can use LIGHTLLM_TRITON_AUTOTUNE_LEVEL=1 to enable autotune.",
                 )
             self.cached_configs[static_key] = {}
 
diff --git a/test/benchmark/kernel/benchmark_fused_moe_triton.py b/test/benchmark/kernel/benchmark_fused_moe_triton.py
@@ -6,7 +6,7 @@
 import triton
 from transformers import AutoConfig
 from lightllm.common.fused_moe.topk_select import select_experts
-from lightllm.common.fused_moe.grouped_fused_moe import fused_experts_impl
+from lightllm.common.fused_moe.grouped_fused_moe import fused_experts
 
 
 def get_model_config(model_name: str, tp_size: int):
@@ -98,7 +98,7 @@ def fused_moe_lightllm_api(
     )
     use_fp8_w8a8 = use_fp8_w8a8
 
-    return fused_experts_impl(
+    return fused_experts(
         hidden_states=x,
         w1=w1,
         w2=w2,