From 72fd468ea900554c1432db29eb5322e51bcc55c9 Mon Sep 17 00:00:00 2001
From: Hanxian Huang <hanxianhuang@meta.com>
Date: Fri, 5 Dec 2025 11:54:25 -0800
Subject: [PATCH] support skip atten in export (#16104)

Summary:

Support export for llama model variants with attention layer skipping. We only need to specify the attention skip patterns in config.json in layer_type. E.g.,

"layer_types": [
    "full_attention",
    "full_attention",
    "full_attention",
    "skip_attention",
    "skip_attention",
    "skip_attention"
  ]

Differential Revision: D88399533
---
 examples/models/llama/llama_transformer.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/examples/models/llama/llama_transformer.py b/examples/models/llama/llama_transformer.py
index 6c1a5c05d66..00c3bfa1f47 100644
--- a/examples/models/llama/llama_transformer.py
+++ b/examples/models/llama/llama_transformer.py
@@ -272,6 +272,13 @@ def construct_transformer(model_args: ModelArgs) -> Transformer:
                     norm_eps=model_args.norm_eps,
                 )
             )
+        elif (
+            model_args.layer_types
+            and model_args.layer_types[layer_id] == "skip_attention"
+        ):
+            attention = AttentionSkip()
+            transformer_block = TransformerBlock(model_args, attention)
+            layers.append(transformer_block)
         else:
             attention = cls(
                 model_args, layer_id, rope, **model_args.attention_kwargs