From 72fd468ea900554c1432db29eb5322e51bcc55c9 Mon Sep 17 00:00:00 2001 From: Hanxian Huang Date: Fri, 5 Dec 2025 11:54:25 -0800 Subject: [PATCH] support skip atten in export (#16104) Summary: Support export for llama model variants with attention layer skipping. We only need to specify the attention skip patterns in config.json in layer_type. E.g., "layer_types": [ "full_attention", "full_attention", "full_attention", "skip_attention", "skip_attention", "skip_attention" ] Differential Revision: D88399533 --- examples/models/llama/llama_transformer.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/examples/models/llama/llama_transformer.py b/examples/models/llama/llama_transformer.py index 6c1a5c05d66..00c3bfa1f47 100644 --- a/examples/models/llama/llama_transformer.py +++ b/examples/models/llama/llama_transformer.py @@ -272,6 +272,13 @@ def construct_transformer(model_args: ModelArgs) -> Transformer: norm_eps=model_args.norm_eps, ) ) + elif ( + model_args.layer_types + and model_args.layer_types[layer_id] == "skip_attention" + ): + attention = AttentionSkip() + transformer_block = TransformerBlock(model_args, attention) + layers.append(transformer_block) else: attention = cls( model_args, layer_id, rope, **model_args.attention_kwargs