Skip to content

Commit 1e9e0c7

Browse files
committed
Merge remote-tracking branch 'origin/main' into cett3
2 parents 59904f1 + d746a00 commit 1e9e0c7

File tree

5 files changed

+221
-86
lines changed

5 files changed

+221
-86
lines changed
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
{
2+
"_name_or_path": "SparseLLM/ReluLLaMA-7B",
3+
"sparsity": 0.3,
4+
"architectures": ["LlamaSkipConnectionForCausalLM"],
5+
"bos_token_id": 1,
6+
"eos_token_id": 2,
7+
"hidden_act": "relu",
8+
"hidden_size": 4096,
9+
"initializer_range": 0.02,
10+
"intermediate_size": 11008,
11+
"max_length": 4096,
12+
"max_position_embeddings": 2048,
13+
"model_type": "llama-skip",
14+
"num_attention_heads": 32,
15+
"num_hidden_layers": 32,
16+
"num_key_value_heads": 32,
17+
"pad_token_id": 0,
18+
"pretraining_tp": 1,
19+
"rms_norm_eps": 1e-05,
20+
"rope_scaling": null,
21+
"tie_word_embeddings": false,
22+
"torch_dtype": "float32",
23+
"transformers_version": "4.31.0",
24+
"use_cache": true,
25+
"vocab_size": 32000
26+
}

sparse_transformers/csrc/sparse_mlp_op.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ torch::Tensor sparse_mlp_forward_cpu(
148148
auto gate_proj = combined_proj_block.narrow(1, 0, gate_size); // [block_size, gate_size]
149149
auto up_proj = combined_proj_block.narrow(1, gate_size, gate_size); // [block_size, gate_size]
150150

151-
gate_proj.sigmoid_(); // In-place sigmoid
151+
gate_proj.relu_(); // In-place relu
152152
gate_proj.mul_(up_proj); // In-place element-wise multiplication
153153

154154
// Final projection to output dimension

src/models/llama/modelling_llama_skip.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ def _set_mlp_inference(self, config, layer_idx):
6161
config.intermediate_size,
6262
config.sparsity,
6363
config.mlp_bias,
64-
"silu"
64+
config.hidden_act
6565
)
6666

6767

0 commit comments

Comments
 (0)