[CPU] Fix test/cpu/test_ipex_llm_quantization.py and test_ipex_optimize_transformers.py (#5016)

Liangliang-Ma · web-flow · commit 2d56b82e8e7d · 2024-11-14T09:58:28.000+08:00
* fix test_ipex_llm_quantization.py

* fix test_ipex_optimize_transformers.py

* change int4 parameter order in test
diff --git a/intel_extension_for_pytorch/llm/quantization/woq_linear.py b/intel_extension_for_pytorch/llm/quantization/woq_linear.py
@@ -71,8 +71,8 @@ def from_weight(
             bias,
             group_size,
             g_idx,
-            quant_method,
             dtype,
+            quant_method,
             **kwargs,
         )
         return cls(woq_linear_impl)
diff --git a/intel_extension_for_pytorch/transformers/generation/sample.py b/intel_extension_for_pytorch/transformers/generation/sample.py
@@ -90,16 +90,6 @@ def _sample(
     logits_warper = (
         logits_warper if logits_warper is not None else LogitsProcessorList()
     )
-    pad_token_id = (
-        pad_token_id
-        if pad_token_id is not None
-        else self.generation_config.pad_token_id
-    )
-    eos_token_id = (
-        eos_token_id
-        if eos_token_id is not None
-        else self.generation_config.eos_token_id
-    )
     if isinstance(eos_token_id, int):
         eos_token_id = [eos_token_id]
     eos_token_id_tensor = (
diff --git a/tests/gpu/examples/test_int4_linear.py b/tests/gpu/examples/test_int4_linear.py
@@ -386,8 +386,8 @@ def test_awq_woqlinear_interface(
             None,
             group_size,
             g_idx4kernel,
-            ipex.llm.quantization.QuantMethod.AWQ_GEMM,
             ipex.llm.quantization.QuantDtype.INT4,
+            ipex.llm.quantization.QuantMethod.AWQ_GEMM,
         )
         out_xetla = woqlinear(input)
         out_torch = torch.matmul(input_torch, weight_fp16)