From fae847a43745e49b9f926373aa6120f5a5ff7803 Mon Sep 17 00:00:00 2001
From: GangGreenTemperTatum
 <104169244+GangGreenTemperTatum@users.noreply.github.com>
Date: Wed, 7 Jan 2026 12:46:24 -0500
Subject: [PATCH 1/3] chore: add custom system prompt capability to judge

---
 dreadnode/scorers/judge.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/dreadnode/scorers/judge.py b/dreadnode/scorers/judge.py
index df5bab28..46e0ac06 100644
--- a/dreadnode/scorers/judge.py
+++ b/dreadnode/scorers/judge.py
@@ -46,6 +46,7 @@ def llm_judge(
     min_score: float | None = None,
     max_score: float | None = None,
     name: str = "llm_judge",
+    system_prompt: str | None = None,
 ) -> "Scorer[t.Any]":
     """
     Score the output of a task using an LLM to judge it against a rubric.
@@ -60,6 +61,7 @@ def llm_judge(
         min_score: Optional minimum score for the judgement - if provided, the score will be clamped to this value.
         max_score: Optional maximum score for the judgement - if provided, the score will be clamped to this value.
         name: The name of the scorer.
+        system_prompt: Optional custom system prompt for the judge. If None, uses default.
     """
 
     async def evaluate(
@@ -97,7 +99,16 @@ async def evaluate(
             rubric=rubric,
         )
 
-        judgement = await judge.bind(generator)(input_data)
+        if system_prompt:
+            completion = (
+                await generator.chat(system_prompt)
+                .add(input_data.model_dump_json())
+                .until_parsed_as(Judgement)
+                .run()
+            )
+            judgement = completion.last.parse(Judgement)
+        else:
+            judgement = await judge.bind(generator)(input_data)
 
         if min_score is not None:
             judgement.score = max(min_score, judgement.score)

From 066ad786f3c644124393501baea81d0a878844bf Mon Sep 17 00:00:00 2001
From: monoxgas <monoxgas@gmail.com>
Date: Mon, 12 Jan 2026 12:57:30 -0700
Subject: [PATCH 2/3] Fixing system prompt for judge

---
 dreadnode/scorers/judge.py | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/dreadnode/scorers/judge.py b/dreadnode/scorers/judge.py
index 46e0ac06..2815091a 100644
--- a/dreadnode/scorers/judge.py
+++ b/dreadnode/scorers/judge.py
@@ -76,6 +76,7 @@ async def evaluate(
         model_params: rg.GenerateParams | AnyDict | None = model_params,
         min_score: float | None = min_score,
         max_score: float | None = max_score,
+        system_prompt: str | None = system_prompt,
     ) -> list[Metric]:
         generator: rg.Generator
         if isinstance(model, str):
@@ -99,16 +100,11 @@ async def evaluate(
             rubric=rubric,
         )
 
-        if system_prompt:
-            completion = (
-                await generator.chat(system_prompt)
-                .add(input_data.model_dump_json())
-                .until_parsed_as(Judgement)
-                .run()
-            )
-            judgement = completion.last.parse(Judgement)
-        else:
-            judgement = await judge.bind(generator)(input_data)
+        judgement = await judge.bind(
+            generator.chat({"role": "system", "content": system_prompt})
+            if system_prompt
+            else generator
+        )(input_data)
 
         if min_score is not None:
             judgement.score = max(min_score, judgement.score)

From c76f63d17eec5ec036158534d808e6695a822f3e Mon Sep 17 00:00:00 2001
From: Raja Sekhar Rao Dheekonda <raja@dreadnode.io>
Date: Mon, 12 Jan 2026 12:38:38 -0800
Subject: [PATCH 3/3] leverage inject system content util function in judge
 module

---
 dreadnode/scorers/judge.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/dreadnode/scorers/judge.py b/dreadnode/scorers/judge.py
index 2815091a..a323752f 100644
--- a/dreadnode/scorers/judge.py
+++ b/dreadnode/scorers/judge.py
@@ -100,11 +100,10 @@ async def evaluate(
             rubric=rubric,
         )
 
-        judgement = await judge.bind(
-            generator.chat({"role": "system", "content": system_prompt})
-            if system_prompt
-            else generator
-        )(input_data)
+        pipeline = generator.chat([])
+        if system_prompt:
+            pipeline.chat.inject_system_content(system_prompt)
+        judgement = await judge.bind(pipeline)(input_data)
 
         if min_score is not None:
             judgement.score = max(min_score, judgement.score)