@@ -347,6 +347,7 @@ def _convert_text_completion_chunks_to_chat(
347347 "finish_reason" : chunk ["choices" ][0 ]["finish_reason" ],
348348 }
349349 ],
350+ "usage" : chunk .get ("usage" ) if "usage" in chunk else None ,
350351 }
351352
352353
@@ -431,7 +432,7 @@ def _stream_response_to_function_stream(
431432 created = chunk ["created" ]
432433 model = chunk ["model" ]
433434 tool_id = "call_" + "_0_" + tool_name + "_" + chunk ["id" ]
434- yield {
435+ response = {
435436 "id" : id_ ,
436437 "object" : "chat.completion.chunk" ,
437438 "created" : created ,
@@ -450,7 +451,11 @@ def _stream_response_to_function_stream(
450451 }
451452 ],
452453 }
453- yield {
454+ if "usage" in chunk :
455+ response ["usage" ] = chunk ["usage" ]
456+ yield response
457+
458+ response = {
454459 "id" : "chat" + chunk ["id" ],
455460 "object" : "chat.completion.chunk" ,
456461 "created" : chunk ["created" ],
@@ -484,10 +489,14 @@ def _stream_response_to_function_stream(
484489 }
485490 ],
486491 }
492+ if "usage" in chunk :
493+ response ["usage" ] = chunk ["usage" ]
494+ yield response
487495 first = False
488496 continue
497+
489498 assert tool_id is not None
490- yield {
499+ response = {
491500 "id" : "chat" + chunk ["id" ],
492501 "object" : "chat.completion.chunk" ,
493502 "created" : chunk ["created" ],
@@ -519,9 +528,12 @@ def _stream_response_to_function_stream(
519528 }
520529 ],
521530 }
531+ if "usage" in chunk :
532+ response ["usage" ] = chunk ["usage" ]
533+ yield response
522534
523535 if id_ is not None and created is not None and model is not None :
524- yield {
536+ response = {
525537 "id" : id_ ,
526538 "object" : "chat.completion.chunk" ,
527539 "created" : created ,
@@ -540,6 +552,9 @@ def _stream_response_to_function_stream(
540552 }
541553 ],
542554 }
555+ if "usage" in chunk :
556+ response ["usage" ] = chunk ["usage" ]
557+ yield response
543558
544559 return _stream_response_to_function_stream (chunks )
545560
@@ -2120,6 +2135,7 @@ def generate_streaming(tools, functions, function_call, prompt):
21202135 },
21212136 }
21222137 ],
2138+ usage = chunk ["usage" ] if "usage" in chunk else None ,
21232139 )
21242140 first = False
21252141 if tools is not None :
@@ -2160,6 +2176,7 @@ def generate_streaming(tools, functions, function_call, prompt):
21602176 },
21612177 }
21622178 ],
2179+ usage = chunk ["usage" ] if "usage" in chunk else None ,
21632180 )
21642181 # Yield tool_call/function_call stop message
21652182 yield llama_types .CreateChatCompletionStreamResponse (
@@ -2182,6 +2199,7 @@ def generate_streaming(tools, functions, function_call, prompt):
21822199 },
21832200 }
21842201 ],
2202+ usage = chunk ["usage" ] if "usage" in chunk else None ,
21852203 )
21862204 # If "auto" or no tool_choice/function_call
21872205 elif isinstance (function_call , str ) and function_call == "auto" :
@@ -2217,6 +2235,7 @@ def generate_streaming(tools, functions, function_call, prompt):
22172235 "finish_reason" : None ,
22182236 }
22192237 ],
2238+ usage = chunk ["usage" ] if "usage" in chunk else None ,
22202239 )
22212240 else :
22222241 prompt += f"{ function_name } \n <|content|>"
@@ -2262,6 +2281,7 @@ def generate_streaming(tools, functions, function_call, prompt):
22622281 },
22632282 }
22642283 ],
2284+ usage = chunk ["usage" ] if "usage" in chunk else None ,
22652285 )
22662286 # Generate content
22672287 stops = [RECIPIENT_TOKEN , STOP_TOKEN ]
@@ -2299,6 +2319,7 @@ def generate_streaming(tools, functions, function_call, prompt):
22992319 },
23002320 }
23012321 ],
2322+ usage = chunk ["usage" ] if "usage" in chunk else None ,
23022323 )
23032324 is_end = False
23042325 elif chunk ["choices" ][0 ]["text" ] == "\n " :
@@ -2328,6 +2349,7 @@ def generate_streaming(tools, functions, function_call, prompt):
23282349 },
23292350 }
23302351 ],
2352+ usage = chunk ["usage" ] if "usage" in chunk else None ,
23312353 )
23322354 # Check whether the model wants to generate another turn
23332355 if (
@@ -2360,6 +2382,7 @@ def generate_streaming(tools, functions, function_call, prompt):
23602382 "finish_reason" : "stop" ,
23612383 }
23622384 ],
2385+ usage = chunk ["usage" ] if "usage" in chunk else None ,
23632386 )
23642387 break
23652388 else :
@@ -2409,6 +2432,7 @@ def generate_streaming(tools, functions, function_call, prompt):
24092432 },
24102433 }
24112434 ],
2435+ usage = chunk ["usage" ] if "usage" in chunk else None ,
24122436 )
24132437 prompt += completion_text .strip ()
24142438 grammar = None
@@ -2448,6 +2472,7 @@ def generate_streaming(tools, functions, function_call, prompt):
24482472 },
24492473 }
24502474 ],
2475+ usage = chunk ["usage" ] if "usage" in chunk else None ,
24512476 )
24522477 break
24532478
0 commit comments