Fix reasoning model generation output

Alex-Welsh · Alex-Welsh · commit 335b7d1063ff · 2025-03-03T10:59:44.000Z
diff --git a/web-apps/chat/app.py b/web-apps/chat/app.py
@@ -64,6 +64,7 @@ class PossibleSystemPromptException(Exception):
 def inference(latest_message, history):
     # Allow mutating global variable
     global BACKEND_INITIALISED
+    log.debug("Inference request received with history: %s", history)
 
     try:
         context = []
@@ -86,21 +87,27 @@ def inference(latest_message, history):
 
         log.debug("Chat context: %s", context)
 
-
         response = ""
+        thinking = False
+
         for chunk in llm.stream(context):
             # If this is our first successful response from the backend
             # then update the status variable to allow future error messages
             # to be more informative
             if not BACKEND_INITIALISED and len(response) > 0:
                 BACKEND_INITIALISED = True
 
-            # NOTE(sd109): For some reason the '>' character breaks the UI
-            # so we need to escape it here.
-            # response += chunk.content.replace('>', '\>')
-            # UPDATE(sd109): Above bug seems to have been fixed as of gradio 4.15.0
-            # but keeping this note here incase we enounter it again
-            response += chunk.content
+            # The "think" tags mark the chatbot's reasoning. Remove the content
+            # and replace with "Thinking..." until the closing tag is found.
+            content = chunk.content
+            if '<think>' in content or thinking:
+                thinking = True
+                response = "Thinking..."
+                if '</think>' in content:
+                    thinking = False
+                    response = ""
+            else:
+                response += content
             yield response
 
     # Handle any API errors here. See OpenAI Python client for possible error responses
@@ -171,7 +178,12 @@ def inference_wrapper(*args):
         inference_wrapper,
         type="messages",
         analytics_enabled=False,
-        chatbot=gr.Chatbot(show_copy_button=True),
+        chatbot=gr.Chatbot(
+            show_copy_button=True,
+            height="75vh",
+            resizable=True,
+            sanitize_html=True,
+            ),
     )