@@ -64,6 +64,7 @@ class PossibleSystemPromptException(Exception):
6464def inference (latest_message , history ):
6565 # Allow mutating global variable
6666 global BACKEND_INITIALISED
67+ log .debug ("Inference request received with history: %s" , history )
6768
6869 try :
6970 context = []
@@ -86,21 +87,27 @@ def inference(latest_message, history):
8687
8788 log .debug ("Chat context: %s" , context )
8889
89-
9090 response = ""
91+ thinking = False
92+
9193 for chunk in llm .stream (context ):
9294 # If this is our first successful response from the backend
9395 # then update the status variable to allow future error messages
9496 # to be more informative
9597 if not BACKEND_INITIALISED and len (response ) > 0 :
9698 BACKEND_INITIALISED = True
9799
98- # NOTE(sd109): For some reason the '>' character breaks the UI
99- # so we need to escape it here.
100- # response += chunk.content.replace('>', '\>')
101- # UPDATE(sd109): Above bug seems to have been fixed as of gradio 4.15.0
102- # but keeping this note here incase we enounter it again
103- response += chunk .content
100+ # The "think" tags mark the chatbot's reasoning. Remove the content
101+ # and replace with "Thinking..." until the closing tag is found.
102+ content = chunk .content
103+ if '<think>' in content or thinking :
104+ thinking = True
105+ response = "Thinking..."
106+ if '</think>' in content :
107+ thinking = False
108+ response = ""
109+ else :
110+ response += content
104111 yield response
105112
106113 # Handle any API errors here. See OpenAI Python client for possible error responses
@@ -171,7 +178,12 @@ def inference_wrapper(*args):
171178 inference_wrapper ,
172179 type = "messages" ,
173180 analytics_enabled = False ,
174- chatbot = gr .Chatbot (show_copy_button = True ),
181+ chatbot = gr .Chatbot (
182+ show_copy_button = True ,
183+ height = "75vh" ,
184+ resizable = True ,
185+ sanitize_html = True ,
186+ ),
175187 )
176188
177189
0 commit comments