Skip to content

Commit a2c40a5

Browse files
committed
-- added save and load model unit test
1 parent 756be88 commit a2c40a5

File tree

1 file changed

+141
-0
lines changed

1 file changed

+141
-0
lines changed

tests/test_common.py

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
#!/usr/bin/env python
2+
"""Tests for `pytorch_tabular` package."""
3+
4+
import pytest
5+
import numpy as np
6+
import torch
7+
from sklearn.preprocessing import PowerTransformer
8+
from pytorch_tabular.config import DataConfig, OptimizerConfig, TrainerConfig
9+
from pytorch_tabular.models import CategoryEmbeddingModelConfig, AutoIntConfig, NodeConfig, TabNetModelConfig, CategoryEmbeddingMDNConfig
10+
from pytorch_tabular import TabularModel
11+
from pytorch_tabular.categorical_encoders import CategoricalEmbeddingTransformer
12+
13+
MODEL_CONFIGS = [
14+
CategoryEmbeddingModelConfig,
15+
AutoIntConfig,
16+
NodeConfig,
17+
TabNetModelConfig,
18+
CategoryEmbeddingMDNConfig
19+
]
20+
21+
22+
def fake_metric(y_hat, y):
23+
return (y_hat - y).mean()
24+
25+
26+
@pytest.mark.parametrize(
27+
"model_config_class",
28+
MODEL_CONFIGS,
29+
)
30+
@pytest.mark.parametrize(
31+
"continuous_cols",
32+
[
33+
[
34+
"AveRooms",
35+
"AveBedrms",
36+
"Population",
37+
"AveOccup",
38+
"Latitude",
39+
"Longitude",
40+
],
41+
],
42+
)
43+
@pytest.mark.parametrize("categorical_cols", [["HouseAgeBin"]])
44+
@pytest.mark.parametrize("custom_metrics", [None, [fake_metric]])
45+
@pytest.mark.parametrize("custom_loss", [None, torch.nn.L1Loss()])
46+
@pytest.mark.parametrize("custom_optimizer", [None, torch.optim.Adagrad])
47+
def test_save_load(
48+
regression_data,
49+
model_config_class,
50+
continuous_cols,
51+
categorical_cols,
52+
custom_metrics,
53+
custom_loss,
54+
custom_optimizer,
55+
tmpdir
56+
):
57+
(train, test, target) = regression_data
58+
data_config = DataConfig(
59+
target=target,
60+
continuous_cols=continuous_cols,
61+
categorical_cols=categorical_cols,
62+
)
63+
model_config_params = dict(task="regression")
64+
model_config = model_config_class(**model_config_params)
65+
trainer_config = TrainerConfig(
66+
max_epochs=3, checkpoints=None, early_stopping=None, gpus=0
67+
)
68+
optimizer_config = OptimizerConfig()
69+
70+
tabular_model = TabularModel(
71+
data_config=data_config,
72+
model_config=model_config,
73+
optimizer_config=optimizer_config,
74+
trainer_config=trainer_config,
75+
)
76+
tabular_model.fit(
77+
train=train,
78+
test=test,
79+
metrics=custom_metrics,
80+
loss=custom_loss,
81+
optimizer=custom_optimizer,
82+
optimizer_params=None if custom_optimizer is None else {},
83+
)
84+
85+
result_1 = tabular_model.evaluate(test)
86+
print(result_1)
87+
tmpdir.mkdir("save_model")
88+
tabular_model.save_model("save_model")
89+
new_mdl = TabularModel.load_from_checkpoint("save_model")
90+
result_2 = new_mdl.evaluate(test)
91+
assert result_1[0][f'test_{tabular_model.model.hparams.metrics[0]}'] == result_2[0][f'test_{new_mdl.model.hparams.metrics[0]}']
92+
93+
# import numpy as np
94+
# import pandas as pd
95+
# from sklearn.datasets import fetch_california_housing, fetch_covtype
96+
# from pathlib import Path
97+
98+
# def regression_data():
99+
# dataset = fetch_california_housing(data_home="data", as_frame=True)
100+
# df = dataset.frame.sample(5000)
101+
# df["HouseAgeBin"] = pd.qcut(df["HouseAge"], q=4)
102+
# df["HouseAgeBin"] = "age_" + df.HouseAgeBin.cat.codes.astype(str)
103+
# test_idx = df.sample(int(0.2 * len(df)), random_state=42).index
104+
# test = df[df.index.isin(test_idx)]
105+
# train = df[~df.index.isin(test_idx)]
106+
# return (train, test, dataset.target_names)
107+
108+
109+
# def classification_data():
110+
# dataset = fetch_covtype(data_home="data")
111+
# data = np.hstack([dataset.data, dataset.target.reshape(-1, 1)])[:10000, :]
112+
# col_names = [f"feature_{i}" for i in range(data.shape[-1])]
113+
# col_names[-1] = "target"
114+
# data = pd.DataFrame(data, columns=col_names)
115+
# data["feature_0_cat"] = pd.qcut(data["feature_0"], q=4)
116+
# data["feature_0_cat"] = "feature_0_" + data.feature_0_cat.cat.codes.astype(str)
117+
# test_idx = data.sample(int(0.2 * len(data)), random_state=42).index
118+
# test = data[data.index.isin(test_idx)]
119+
# train = data[~data.index.isin(test_idx)]
120+
# return (train, test, ["target"])
121+
122+
123+
# test_save_load(
124+
# regression_data(),
125+
# model_config_class=CategoryEmbeddingModelConfig,
126+
# continuous_cols=[
127+
# "AveRooms",
128+
# "AveBedrms",
129+
# "Population",
130+
# "AveOccup",
131+
# "Latitude",
132+
# "Longitude",
133+
# ],
134+
# categorical_cols=[],
135+
# custom_metrics = None, #[fake_metric],
136+
# custom_loss = None, custom_optimizer = None,
137+
# tmpdir = Path("tmp")
138+
# )
139+
# test_embedding_transformer(regression_data())
140+
141+
# classification_data()

0 commit comments

Comments
 (0)