@@ -10,42 +10,36 @@ def __init__(self, base_url: str = "http://localhost:11434/v1"):
1010
1111 async def stream_chat (self , prompt : str ) -> AsyncGenerator [bytes , None ]:
1212 """Stream chat completion responses from LLM."""
13- # Send user message first
14- user_msg = {
15- 'role' : 'user' ,
16- 'content' : prompt ,
17- }
18- yield orjson .dumps (user_msg ) + b'\n '
13+ # Send initial user message
14+ yield orjson .dumps ({"role" : "user" , "content" : prompt }) + b"\n "
1915
20- # Open client as context manager and stream responses
2116 async with httpx .AsyncClient (base_url = self .base_url ) as client :
17+ request_data = {
18+ "model" : self .model ,
19+ "messages" : [{"role" : "user" , "content" : prompt }],
20+ "stream" : True ,
21+ }
22+
2223 async with client .stream (
23- "POST" ,
24- "/chat/completions" ,
25- json = {
26- "model" : self .model ,
27- "messages" : [{"role" : "user" , "content" : prompt }],
28- "stream" : True
29- },
30- timeout = 60.0
24+ "POST" , "/chat/completions" , json = request_data , timeout = 60.0
3125 ) as response :
3226 async for line in response .aiter_lines ():
33- print (line )
34- if line .startswith ("data: " ) and line != "data: [DONE]" :
35- try :
36- json_line = line [6 :] # Remove "data: " prefix
37- data = orjson .loads (json_line )
38- content = data .get ("choices" , [{}])[0 ].get ("delta" , {}).get ("content" , "" )
39- if content :
40- model_msg = {
41- 'role' : 'model' ,
42- 'content' : content
43- }
44- yield orjson .dumps (model_msg ) + b'\n '
45- except Exception :
46- pass
27+ if not (line .startswith ("data: " ) and line != "data: [DONE]" ):
28+ continue
29+ try :
30+ data = orjson .loads (line [6 :]) # Skip "data: " prefix
31+ if (
32+ content := data .get ("choices" , [{}])[0 ]
33+ .get ("delta" , {})
34+ .get ("content" , "" )
35+ ):
36+ yield (
37+ orjson .dumps ({"role" : "model" , "content" : content })
38+ + b"\n "
39+ )
40+ except Exception :
41+ pass
4742
4843
49- # FastAPI dependency
5044def get_llm_service (base_url : Optional [str ] = None ) -> StreamLLMService :
51- return StreamLLMService (base_url = base_url )
45+ return StreamLLMService (base_url = base_url )
0 commit comments