Skip to content

Commit c4f1583

Browse files
committed
-- added unit test cases - FTTransformer
1 parent 38166cf commit c4f1583

File tree

1 file changed

+233
-0
lines changed

1 file changed

+233
-0
lines changed

tests/test_ft_transformer.py

Lines changed: 233 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,233 @@
1+
#!/usr/bin/env python
2+
"""Tests for `pytorch_tabular` package."""
3+
import pytest
4+
5+
from pytorch_tabular.config import DataConfig, OptimizerConfig, TrainerConfig
6+
from pytorch_tabular.models import FTTransformerConfig
7+
from pytorch_tabular import TabularModel
8+
from pytorch_tabular.categorical_encoders import CategoricalEmbeddingTransformer
9+
10+
11+
@pytest.mark.parametrize("multi_target", [True, False])
12+
@pytest.mark.parametrize(
13+
"continuous_cols",
14+
[
15+
[
16+
"AveRooms",
17+
"AveBedrms",
18+
"Population",
19+
"AveOccup",
20+
"Latitude",
21+
"Longitude",
22+
],
23+
],
24+
)
25+
@pytest.mark.parametrize("categorical_cols", [["HouseAgeBin"]])
26+
@pytest.mark.parametrize("continuous_feature_transform", [None])
27+
@pytest.mark.parametrize("normalize_continuous_features", [True])
28+
@pytest.mark.parametrize("target_range", [True, False])
29+
def test_regression(
30+
regression_data,
31+
multi_target,
32+
continuous_cols,
33+
categorical_cols,
34+
continuous_feature_transform,
35+
normalize_continuous_features,
36+
target_range,
37+
):
38+
(train, test, target) = regression_data
39+
if len(continuous_cols) + len(categorical_cols) == 0:
40+
assert True
41+
else:
42+
data_config = DataConfig(
43+
target=target + ["MedInc"] if multi_target else target,
44+
continuous_cols=continuous_cols,
45+
categorical_cols=categorical_cols,
46+
continuous_feature_transform=continuous_feature_transform,
47+
normalize_continuous_features=normalize_continuous_features,
48+
)
49+
model_config_params = dict(
50+
task="regression",
51+
input_embed_dim=8,
52+
num_attn_blocks=1,
53+
num_heads=2,
54+
)
55+
if target_range:
56+
_target_range = []
57+
for target in data_config.target:
58+
_target_range.append(
59+
(
60+
train[target].min().item(),
61+
train[target].max().item(),
62+
)
63+
)
64+
model_config_params["target_range"] = _target_range
65+
model_config = FTTransformerConfig(**model_config_params)
66+
trainer_config = TrainerConfig(
67+
max_epochs=1, checkpoints=None, early_stopping=None, gpus=None, fast_dev_run=True
68+
)
69+
optimizer_config = OptimizerConfig()
70+
71+
tabular_model = TabularModel(
72+
data_config=data_config,
73+
model_config=model_config,
74+
optimizer_config=optimizer_config,
75+
trainer_config=trainer_config,
76+
)
77+
tabular_model.fit(train=train, test=test)
78+
79+
result = tabular_model.evaluate(test)
80+
assert "test_mean_squared_error" in result[0].keys()
81+
pred_df = tabular_model.predict(test)
82+
assert pred_df.shape[0] == test.shape[0]
83+
84+
85+
@pytest.mark.parametrize(
86+
"continuous_cols",
87+
[
88+
[f"feature_{i}" for i in range(54)],
89+
],
90+
)
91+
@pytest.mark.parametrize("categorical_cols", [["feature_0_cat"]])
92+
@pytest.mark.parametrize("continuous_feature_transform", [None])
93+
@pytest.mark.parametrize("normalize_continuous_features", [True])
94+
def test_classification(
95+
classification_data,
96+
continuous_cols,
97+
categorical_cols,
98+
continuous_feature_transform,
99+
normalize_continuous_features,
100+
):
101+
(train, test, target) = classification_data
102+
if len(continuous_cols) + len(categorical_cols) == 0:
103+
assert True
104+
else:
105+
data_config = DataConfig(
106+
target=target,
107+
continuous_cols=continuous_cols,
108+
categorical_cols=categorical_cols,
109+
continuous_feature_transform=continuous_feature_transform,
110+
normalize_continuous_features=normalize_continuous_features,
111+
)
112+
model_config_params = dict(
113+
task="classification",
114+
input_embed_dim=8,
115+
num_attn_blocks=1,
116+
num_heads=2,
117+
)
118+
model_config = FTTransformerConfig(**model_config_params)
119+
trainer_config = TrainerConfig(
120+
max_epochs=1, checkpoints=None, early_stopping=None, gpus=None, fast_dev_run=True
121+
)
122+
optimizer_config = OptimizerConfig()
123+
124+
tabular_model = TabularModel(
125+
data_config=data_config,
126+
model_config=model_config,
127+
optimizer_config=optimizer_config,
128+
trainer_config=trainer_config,
129+
)
130+
tabular_model.fit(train=train, test=test)
131+
132+
result = tabular_model.evaluate(test)
133+
assert "test_accuracy" in result[0].keys()
134+
pred_df = tabular_model.predict(test)
135+
assert pred_df.shape[0] == test.shape[0]
136+
137+
138+
def test_embedding_transformer(regression_data):
139+
(train, test, target) = regression_data
140+
data_config = DataConfig(
141+
target=target,
142+
continuous_cols=[
143+
"AveRooms",
144+
"AveBedrms",
145+
"Population",
146+
"AveOccup",
147+
"Latitude",
148+
"Longitude",
149+
],
150+
categorical_cols=["HouseAgeBin"],
151+
)
152+
model_config_params = dict(
153+
task="regression",
154+
input_embed_dim=8,
155+
num_attn_blocks=1,
156+
num_heads=2,
157+
)
158+
model_config = FTTransformerConfig(**model_config_params)
159+
trainer_config = TrainerConfig(
160+
max_epochs=1, checkpoints=None, early_stopping=None, gpus=None, fast_dev_run=True
161+
)
162+
optimizer_config = OptimizerConfig()
163+
164+
tabular_model = TabularModel(
165+
data_config=data_config,
166+
model_config=model_config,
167+
optimizer_config=optimizer_config,
168+
trainer_config=trainer_config,
169+
)
170+
tabular_model.fit(train=train, test=test)
171+
172+
transformer = CategoricalEmbeddingTransformer(tabular_model)
173+
train_transform = transformer.fit_transform(train)
174+
embed_cols = [
175+
col for col in train_transform.columns if "HouseAgeBin_embed_dim" in col
176+
]
177+
assert len(train["HouseAgeBin"].unique()) + 1 == len(
178+
transformer._mapping["HouseAgeBin"].keys()
179+
)
180+
assert all(
181+
[
182+
val.shape[0] == len(embed_cols)
183+
for val in transformer._mapping["HouseAgeBin"].values()
184+
]
185+
)
186+
187+
188+
# import numpy as np
189+
# import pandas as pd
190+
# from sklearn.datasets import fetch_california_housing, fetch_covtype
191+
192+
193+
# def regression_data():
194+
# dataset = fetch_california_housing(data_home="data", as_frame=True)
195+
# df = dataset.frame.sample(5000)
196+
# df["HouseAgeBin"] = pd.qcut(df["HouseAge"], q=4)
197+
# df["HouseAgeBin"] = "age_" + df.HouseAgeBin.cat.codes.astype(str)
198+
# test_idx = df.sample(int(0.2 * len(df)), random_state=42).index
199+
# test = df[df.index.isin(test_idx)]
200+
# train = df[~df.index.isin(test_idx)]
201+
# return (train, test, dataset.target_names)
202+
203+
204+
# def classification_data():
205+
# dataset = fetch_covtype(data_home="data")
206+
# data = np.hstack([dataset.data, dataset.target.reshape(-1, 1)])[:10000, :]
207+
# col_names = [f"feature_{i}" for i in range(data.shape[-1])]
208+
# col_names[-1] = "target"
209+
# data = pd.DataFrame(data, columns=col_names)
210+
# data["feature_0_cat"] = pd.qcut(data["feature_0"], q=4)
211+
# data["feature_0_cat"] = "feature_0_" + data.feature_0_cat.cat.codes.astype(str)
212+
# test_idx = data.sample(int(0.2 * len(data)), random_state=42).index
213+
# test = data[data.index.isin(test_idx)]
214+
# train = data[~data.index.isin(test_idx)]
215+
# return (train, test, ["target"])
216+
217+
218+
# test_regression(
219+
# regression_data(),
220+
# multi_target=True,
221+
# continuous_cols=[
222+
# "AveRooms",
223+
# "AveBedrms",
224+
# "Population",
225+
# "AveOccup",
226+
# "Latitude",
227+
# "Longitude",
228+
# ],
229+
# categorical_cols=[],
230+
# continuous_feature_transform="yeo-johnson",
231+
# normalize_continuous_features=False,
232+
# target_range=True,
233+
# )

0 commit comments

Comments
 (0)