TransformerLensOrg · RishabSA · Jan 5, 2026
diff --git a/demos/LLaMA2_GPU_Quantized.ipynb b/demos/LLaMA2_GPU_Quantized.ipynb
@@ -406,7 +406,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/",
@@ -724,7 +724,7 @@
     "# inference_dtype = torch.float16\n",
     "\n",
     "hf_model = AutoModelForCausalLM.from_pretrained(LLAMA_2_7B_CHAT_PATH,\n",
-    "                                             torch_dtype=inference_dtype,\n",
+    "                                             dtype=inference_dtype,\n",
     "                                             device_map = \"cuda:0\",\n",
     "                                             quantization_config=BitsAndBytesConfig(load_in_4bit=True))\n",
     "\n",

diff --git a/demos/LLaVA.ipynb b/demos/LLaVA.ipynb
diff --git a/demos/stable_lm.ipynb b/demos/stable_lm.ipynb
@@ -74,7 +74,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 3,
+      "execution_count": null,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/",

diff --git a/tests/acceptance/test_hooked_encoder.py b/tests/acceptance/test_hooked_encoder.py
@@ -173,7 +173,7 @@ def test_from_pretrained_revision():
 @pytest.mark.parametrize("dtype", [torch.bfloat16, torch.float16])
 def test_half_precision(dtype):
     """Check the 16 bits loading and inferences."""
-    model = HookedEncoder.from_pretrained(MODEL_NAME, torch_dtype=dtype)
+    model = HookedEncoder.from_pretrained(MODEL_NAME, dtype=dtype)
     assert model.W_K.dtype == dtype
 
     _ = model(model.tokenizer("Hello, world", return_tensors="pt")["input_ids"])

diff --git a/tests/acceptance/test_hooked_transformer.py b/tests/acceptance/test_hooked_transformer.py
@@ -342,7 +342,7 @@ def benchmark_model_options(
 
     if hf_model is None:
         hf_model = AutoModelForCausalLM.from_pretrained(
-            model_name, torch_dtype=dtype, device_map="auto"
+            model_name, dtype=dtype, device_map="auto"
         )
     if tokenizer is None:
         tokenizer = AutoTokenizer.from_pretrained(model_name)
@@ -492,13 +492,13 @@ def check_dtype(dtype, margin, no_processing=False):
     for model_path in ["gpt2", "roneneldan/TinyStories-33M", "EleutherAI/pythia-70m"]:
         if no_processing:
             # For low precision, the processing is not advised.
-            model = HookedTransformer.from_pretrained_no_processing(model_path, torch_dtype=dtype)
+            model = HookedTransformer.from_pretrained_no_processing(model_path, dtype=dtype)
         else:
-            model = HookedTransformer.from_pretrained(model_path, torch_dtype=dtype)
+            model = HookedTransformer.from_pretrained(model_path, dtype=dtype)
 
         hf_model = AutoModelForCausalLM.from_pretrained(
             model_path,
-            torch_dtype=dtype,
+            dtype=dtype,
         ).to("cuda" if torch.cuda.is_available() else "cpu")
 
         for layer_name, layer in model.state_dict().items():

diff --git a/transformer_lens/loading_from_pretrained.py b/transformer_lens/loading_from_pretrained.py
@@ -1903,15 +1903,15 @@ def get_pretrained_state_dict(
                 hf_model = AutoModelForCausalLM.from_pretrained(
                     official_model_name,
                     revision=f"checkpoint-{cfg.checkpoint_value}",
-                    torch_dtype=dtype,
+                    dtype=dtype,
                     token=huggingface_token if len(huggingface_token) > 0 else None,
                     **kwargs,
                 )
             elif official_model_name.startswith("EleutherAI/pythia"):
                 hf_model = AutoModelForCausalLM.from_pretrained(
                     official_model_name,
                     revision=f"step{cfg.checkpoint_value}",
-                    torch_dtype=dtype,
+                    dtype=dtype,
                     token=huggingface_token,
                     **kwargs,
                 )
@@ -1924,21 +1924,21 @@ def get_pretrained_state_dict(
             elif "bert" in official_model_name:
                 hf_model = BertForPreTraining.from_pretrained(
                     official_model_name,
-                    torch_dtype=dtype,
+                    dtype=dtype,
                     token=huggingface_token if len(huggingface_token) > 0 else None,
                     **kwargs,
                 )
             elif "t5" in official_model_name:
                 hf_model = T5ForConditionalGeneration.from_pretrained(
                     official_model_name,
-                    torch_dtype=dtype,
+                    dtype=dtype,
                     token=huggingface_token if len(huggingface_token) > 0 else None,
                     **kwargs,
                 )
             else:
                 hf_model = AutoModelForCausalLM.from_pretrained(
                     official_model_name,
-                    torch_dtype=dtype,
+                    dtype=dtype,
                     token=huggingface_token if len(huggingface_token) > 0 else None,
                     **kwargs,
                 )