Skip to content

Commit c8ff69e

Browse files
committed
refactor llm service
1 parent f261fb3 commit c8ff69e

File tree

1 file changed

+25
-31
lines changed

1 file changed

+25
-31
lines changed

app/services/llm.py

Lines changed: 25 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -10,42 +10,36 @@ def __init__(self, base_url: str = "http://localhost:11434/v1"):
1010

1111
async def stream_chat(self, prompt: str) -> AsyncGenerator[bytes, None]:
1212
"""Stream chat completion responses from LLM."""
13-
# Send user message first
14-
user_msg = {
15-
'role': 'user',
16-
'content': prompt,
17-
}
18-
yield orjson.dumps(user_msg) + b'\n'
13+
# Send initial user message
14+
yield orjson.dumps({"role": "user", "content": prompt}) + b"\n"
1915

20-
# Open client as context manager and stream responses
2116
async with httpx.AsyncClient(base_url=self.base_url) as client:
17+
request_data = {
18+
"model": self.model,
19+
"messages": [{"role": "user", "content": prompt}],
20+
"stream": True,
21+
}
22+
2223
async with client.stream(
23-
"POST",
24-
"/chat/completions",
25-
json={
26-
"model": self.model,
27-
"messages": [{"role": "user", "content": prompt}],
28-
"stream": True
29-
},
30-
timeout=60.0
24+
"POST", "/chat/completions", json=request_data, timeout=60.0
3125
) as response:
3226
async for line in response.aiter_lines():
33-
print(line)
34-
if line.startswith("data: ") and line != "data: [DONE]":
35-
try:
36-
json_line = line[6:] # Remove "data: " prefix
37-
data = orjson.loads(json_line)
38-
content = data.get("choices", [{}])[0].get("delta", {}).get("content", "")
39-
if content:
40-
model_msg = {
41-
'role': 'model',
42-
'content': content
43-
}
44-
yield orjson.dumps(model_msg) + b'\n'
45-
except Exception:
46-
pass
27+
if not (line.startswith("data: ") and line != "data: [DONE]"):
28+
continue
29+
try:
30+
data = orjson.loads(line[6:]) # Skip "data: " prefix
31+
if (
32+
content := data.get("choices", [{}])[0]
33+
.get("delta", {})
34+
.get("content", "")
35+
):
36+
yield (
37+
orjson.dumps({"role": "model", "content": content})
38+
+ b"\n"
39+
)
40+
except Exception:
41+
pass
4742

4843

49-
# FastAPI dependency
5044
def get_llm_service(base_url: Optional[str] = None) -> StreamLLMService:
51-
return StreamLLMService(base_url=base_url)
45+
return StreamLLMService(base_url=base_url)

0 commit comments

Comments
 (0)