Skip to content

Commit f261fb3

Browse files
committed
add llm service
1 parent 79349ff commit f261fb3

File tree

1 file changed

+51
-0
lines changed

1 file changed

+51
-0
lines changed

app/services/llm.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
import httpx
2+
import orjson
3+
from typing import AsyncGenerator, Optional
4+
5+
6+
class StreamLLMService:
7+
def __init__(self, base_url: str = "http://localhost:11434/v1"):
8+
self.base_url = base_url
9+
self.model = "llama3.2"
10+
11+
async def stream_chat(self, prompt: str) -> AsyncGenerator[bytes, None]:
12+
"""Stream chat completion responses from LLM."""
13+
# Send user message first
14+
user_msg = {
15+
'role': 'user',
16+
'content': prompt,
17+
}
18+
yield orjson.dumps(user_msg) + b'\n'
19+
20+
# Open client as context manager and stream responses
21+
async with httpx.AsyncClient(base_url=self.base_url) as client:
22+
async with client.stream(
23+
"POST",
24+
"/chat/completions",
25+
json={
26+
"model": self.model,
27+
"messages": [{"role": "user", "content": prompt}],
28+
"stream": True
29+
},
30+
timeout=60.0
31+
) as response:
32+
async for line in response.aiter_lines():
33+
print(line)
34+
if line.startswith("data: ") and line != "data: [DONE]":
35+
try:
36+
json_line = line[6:] # Remove "data: " prefix
37+
data = orjson.loads(json_line)
38+
content = data.get("choices", [{}])[0].get("delta", {}).get("content", "")
39+
if content:
40+
model_msg = {
41+
'role': 'model',
42+
'content': content
43+
}
44+
yield orjson.dumps(model_msg) + b'\n'
45+
except Exception:
46+
pass
47+
48+
49+
# FastAPI dependency
50+
def get_llm_service(base_url: Optional[str] = None) -> StreamLLMService:
51+
return StreamLLMService(base_url=base_url)

0 commit comments

Comments
 (0)