@@ -356,6 +356,7 @@ def _convert_text_completion_chunks_to_chat(
356356 "finish_reason" : chunk ["choices" ][0 ]["finish_reason" ],
357357 }
358358 ],
359+ "usage" : chunk .get ("usage" ) if "usage" in chunk else None ,
359360 }
360361
361362
@@ -440,7 +441,7 @@ def _stream_response_to_function_stream(
440441 created = chunk ["created" ]
441442 model = chunk ["model" ]
442443 tool_id = "call_" + "_0_" + tool_name + "_" + chunk ["id" ]
443- yield {
444+ response = {
444445 "id" : id_ ,
445446 "object" : "chat.completion.chunk" ,
446447 "created" : created ,
@@ -459,7 +460,11 @@ def _stream_response_to_function_stream(
459460 }
460461 ],
461462 }
462- yield {
463+ if "usage" in chunk :
464+ response ["usage" ] = chunk ["usage" ]
465+ yield response
466+
467+ response = {
463468 "id" : "chat" + chunk ["id" ],
464469 "object" : "chat.completion.chunk" ,
465470 "created" : chunk ["created" ],
@@ -493,10 +498,14 @@ def _stream_response_to_function_stream(
493498 }
494499 ],
495500 }
501+ if "usage" in chunk :
502+ response ["usage" ] = chunk ["usage" ]
503+ yield response
496504 first = False
497505 continue
506+
498507 assert tool_id is not None
499- yield {
508+ response = {
500509 "id" : "chat" + chunk ["id" ],
501510 "object" : "chat.completion.chunk" ,
502511 "created" : chunk ["created" ],
@@ -528,9 +537,12 @@ def _stream_response_to_function_stream(
528537 }
529538 ],
530539 }
540+ if "usage" in chunk :
541+ response ["usage" ] = chunk ["usage" ]
542+ yield response
531543
532544 if id_ is not None and created is not None and model is not None :
533- yield {
545+ response = {
534546 "id" : id_ ,
535547 "object" : "chat.completion.chunk" ,
536548 "created" : created ,
@@ -549,6 +561,9 @@ def _stream_response_to_function_stream(
549561 }
550562 ],
551563 }
564+ if "usage" in chunk :
565+ response ["usage" ] = chunk ["usage" ]
566+ yield response
552567
553568 return _stream_response_to_function_stream (chunks )
554569
@@ -2129,6 +2144,7 @@ def generate_streaming(tools, functions, function_call, prompt):
21292144 },
21302145 }
21312146 ],
2147+ usage = chunk ["usage" ] if "usage" in chunk else None ,
21322148 )
21332149 first = False
21342150 if tools is not None :
@@ -2169,6 +2185,7 @@ def generate_streaming(tools, functions, function_call, prompt):
21692185 },
21702186 }
21712187 ],
2188+ usage = chunk ["usage" ] if "usage" in chunk else None ,
21722189 )
21732190 # Yield tool_call/function_call stop message
21742191 yield llama_types .CreateChatCompletionStreamResponse (
@@ -2191,6 +2208,7 @@ def generate_streaming(tools, functions, function_call, prompt):
21912208 },
21922209 }
21932210 ],
2211+ usage = chunk ["usage" ] if "usage" in chunk else None ,
21942212 )
21952213 # If "auto" or no tool_choice/function_call
21962214 elif isinstance (function_call , str ) and function_call == "auto" :
@@ -2226,6 +2244,7 @@ def generate_streaming(tools, functions, function_call, prompt):
22262244 "finish_reason" : None ,
22272245 }
22282246 ],
2247+ usage = chunk ["usage" ] if "usage" in chunk else None ,
22292248 )
22302249 else :
22312250 prompt += f"{ function_name } \n <|content|>"
@@ -2271,6 +2290,7 @@ def generate_streaming(tools, functions, function_call, prompt):
22712290 },
22722291 }
22732292 ],
2293+ usage = chunk ["usage" ] if "usage" in chunk else None ,
22742294 )
22752295 # Generate content
22762296 stops = [RECIPIENT_TOKEN , STOP_TOKEN ]
@@ -2308,6 +2328,7 @@ def generate_streaming(tools, functions, function_call, prompt):
23082328 },
23092329 }
23102330 ],
2331+ usage = chunk ["usage" ] if "usage" in chunk else None ,
23112332 )
23122333 is_end = False
23132334 elif chunk ["choices" ][0 ]["text" ] == "\n " :
@@ -2337,6 +2358,7 @@ def generate_streaming(tools, functions, function_call, prompt):
23372358 },
23382359 }
23392360 ],
2361+ usage = chunk ["usage" ] if "usage" in chunk else None ,
23402362 )
23412363 # Check whether the model wants to generate another turn
23422364 if (
@@ -2369,6 +2391,7 @@ def generate_streaming(tools, functions, function_call, prompt):
23692391 "finish_reason" : "stop" ,
23702392 }
23712393 ],
2394+ usage = chunk ["usage" ] if "usage" in chunk else None ,
23722395 )
23732396 break
23742397 else :
@@ -2418,6 +2441,7 @@ def generate_streaming(tools, functions, function_call, prompt):
24182441 },
24192442 }
24202443 ],
2444+ usage = chunk ["usage" ] if "usage" in chunk else None ,
24212445 )
24222446 prompt += completion_text .strip ()
24232447 grammar = None
@@ -2457,6 +2481,7 @@ def generate_streaming(tools, functions, function_call, prompt):
24572481 },
24582482 }
24592483 ],
2484+ usage = chunk ["usage" ] if "usage" in chunk else None ,
24602485 )
24612486 break
24622487
0 commit comments