Skip to content

Commit 1e4c444

Browse files
authored
Evaluate (#51)
* Fix several small bugs in evaluate, rename evaluate to avoid a circular import issue, add LM eval to requirements, and set evaluate to use the 4% lora size checkpoints for the time being. Signed-off-by: Kira Selby <kaselby@uwaterloo.ca> * add flags for lora size Signed-off-by: Kira Selby <kaselby@uwaterloo.ca> --------- Signed-off-by: Kira Selby <kaselby@uwaterloo.ca>
1 parent d2252aa commit 1e4c444

File tree

3 files changed

+15
-9
lines changed

3 files changed

+15
-9
lines changed

evaluate.py renamed to downstream_eval.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from lm_eval.utils import make_table
1010
from lm_eval.models.huggingface import HFLM
1111

12+
import src.models
1213

1314
# Setup logging
1415
logging.basicConfig(level=logging.INFO)
@@ -27,7 +28,9 @@ def parse_args():
2728
help="Batch size for processing")
2829
parser.add_argument("--device", type=str, default="auto",
2930
help="Device to use (auto, cpu, cuda)")
30-
return parser
31+
parser.add_argument("--lora_size", type=float, default=4.0,
32+
help="Size of lora predictors to use as percentage of total hidden size")
33+
return parser.parse_args()
3134

3235

3336
def main():
@@ -43,13 +46,15 @@ def main():
4346

4447
# Load pretrained model
4548
logging.info("Loading pretrained model for evaluation...")
46-
config = AutoConfig.from_pretrained(args.model_name_or_config)
49+
4750
if args.model_type == "hf":
48-
model = AutoModelForCausalLM.from_pretrained(config)
51+
model = AutoModelForCausalLM.from_pretrained(args.model_name_or_config)
4952
if args.model_type == "sparse":
53+
config = AutoConfig.from_pretrained(args.model_name_or_config)
54+
config.lora_size = args.lora_size / 100.0
5055
model = AutoModelForCausalLM.from_pretrained(config._name_or_path, config=config)
5156
for layer_idx, layer in enumerate(model.get_decoder().layers):
52-
layer_path = os.path.join(args.sp_dir, f"final_predictor_layer_{layer_idx}")
57+
layer_path = os.path.join(args.sp_dir, f"final_predictor_layer_{layer_idx}_lora_{args.lora_size}pct.pt")
5358
if not os.path.exists(layer_path):
5459
logger.error(f"Pretrained weights for sparse predictor at layer {layer_idx} do not exist.")
5560
return
@@ -60,9 +65,8 @@ def main():
6065

6166
wrapped_model = HFLM(
6267
pretrained=model,
63-
backend="causal",
6468
batch_size=args.batch_size,
65-
device=device,
69+
device=device
6670
)
6771

6872
logging.info("Beginning evaluation...")
@@ -79,4 +83,4 @@ def main():
7983
print(make_table(results, "groups"))
8084

8185
if __name__ == '__main__':
82-
main()
86+
main()

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,4 @@ wandb
1414
ninja
1515
timm
1616
pillow
17+
lm-eval

src/modeling_skip.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ def __init__(self, hidden_size: int, intermediate_size: int, sparsity: float, bi
6464

6565
# Initialize mask but defer WeightCache creation until post_init
6666
self.init_mask = torch.ones(intermediate_size, dtype=torch.bool)
67-
self.init_mask[int(intermediate_size * sparsity):] = 0
67+
self.init_mask[int(intermediate_size * (1-sparsity)):] = 0
6868

6969
self.weight_cache : Optional[WeightCache] = None
7070

@@ -120,7 +120,8 @@ def __init__(self, config: PretrainedConfig, layer_idx: int):
120120

121121
intermediate_size = config.intermediate_size[layer_idx] if isinstance(config.intermediate_size, list) \
122122
else config.intermediate_size
123-
self.lora_size = int(intermediate_size * 0.04)
123+
lora_pct = 0.04 if not hasattr(config, "lora_size") else config.lora_size
124+
self.lora_size = int(intermediate_size * lora_pct)
124125
self.mlp_lora_proj = FastLoRAProjection(
125126
config.hidden_size,
126127
intermediate_size,

0 commit comments

Comments
 (0)