File tree Expand file tree Collapse file tree 4 files changed +21
-20
lines changed
Expand file tree Collapse file tree 4 files changed +21
-20
lines changed Original file line number Diff line number Diff line change 1010 "preview" : " vite preview"
1111 },
1212 "dependencies" : {
13- "@huggingface/transformers" : " 3.7.1 " ,
13+ "@huggingface/transformers" : " 3.8.0 " ,
1414 "dompurify" : " ^3.1.2" ,
1515 "marked" : " ^12.0.2" ,
1616 "react" : " ^18.3.1" ,
Original file line number Diff line number Diff line change @@ -209,15 +209,15 @@ function App() {
209209 < br />
210210 You are about to load{ " " }
211211 < a
212- href = "https://huggingface.co/onnx-community/Llama-3.2-1B-Instruct-q4f16 "
212+ href = "https://huggingface.co/onnx-community/Llama-3.2-1B-Instruct-ONNX "
213213 target = "_blank"
214214 rel = "noreferrer"
215215 className = "font-medium underline"
216216 >
217217 Llama-3.2-1B-Instruct
218218 </ a >
219219 , a 1.24 billion parameter LLM that is optimized for inference on
220- the web. Once downloaded, the model (1.15 GB) will be cached
220+ the web. Once downloaded, the model (1.01 GB) will be cached
221221 and reused when you revisit the page.
222222 < br />
223223 < br />
Original file line number Diff line number Diff line change 99 * This class uses the Singleton pattern to enable lazy-loading of the pipeline
1010 */
1111class TextGenerationPipeline {
12- static model_id = "onnx-community/Llama-3.2-1B-Instruct-q4f16 " ;
12+ static model_id = "onnx-community/Llama-3.2-1B-Instruct-ONNX " ;
1313
1414 static async getInstance ( progress_callback = null ) {
1515 this . tokenizer ??= AutoTokenizer . from_pretrained ( this . model_id , {
@@ -69,8 +69,7 @@ async function generate(messages) {
6969
7070 const { past_key_values, sequences } = await model . generate ( {
7171 ...inputs ,
72- // TODO: Add when model is fixed
73- // past_key_values: past_key_values_cache,
72+ past_key_values : past_key_values_cache ,
7473
7574 // Sampling
7675 do_sample : false ,
@@ -80,7 +79,7 @@ async function generate(messages) {
8079 stopping_criteria,
8180 return_dict_in_generate : true ,
8281 } ) ;
83- // past_key_values_cache = past_key_values;
82+ past_key_values_cache = past_key_values ;
8483
8584 const decoded = tokenizer . batch_decode ( sequences , {
8685 skip_special_tokens : true ,
You can’t perform that action at this time.
0 commit comments