Skip to content

Commit c13d106

Browse files
committed
Added a simulation script for python BCF
1 parent 455ba21 commit c13d106

File tree

2 files changed

+71
-3
lines changed

2 files changed

+71
-3
lines changed

demo/debug/bcf-pred-rmse.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
# Load libraries
2+
from stochtree import BCFModel
3+
import numpy as np
4+
from sklearn.model_selection import train_test_split
5+
from scipy.stats import norm
6+
7+
# Simulation parameters
8+
n = 250
9+
p = 50
10+
n_sim = 100
11+
test_set_pct = 0.2
12+
rng = np.random.default_rng()
13+
14+
# Simulation containers
15+
rmses_cached = np.empty(n_sim)
16+
rmses_pred = np.empty(n_sim)
17+
18+
# Run the simulation
19+
for i in range(n_sim):
20+
# Generate data
21+
X = rng.normal(loc=0.0, scale=1.0, size=(n, p))
22+
mu_X = X[:, 0]
23+
tau_X = 0.25 * X[:, 1]
24+
pi_X = norm.cdf(0.5 * X[:, 1])
25+
Z = rng.binomial(n=1, p=pi_X, size=(n,))
26+
E_XZ = mu_X + tau_X * Z
27+
snr = 2.0
28+
noise_sd = np.std(E_XZ) / snr
29+
y = E_XZ + rng.normal(loc=0.0, scale=noise_sd, size=(n,))
30+
31+
# Train-test split
32+
sample_inds = np.arange(n)
33+
train_inds, test_inds = train_test_split(sample_inds, test_size=test_set_pct)
34+
X_train = X[train_inds, :]
35+
X_test = X[test_inds, :]
36+
Z_train = Z[train_inds]
37+
Z_test = Z[test_inds]
38+
pi_train = pi_X[train_inds]
39+
pi_test = pi_X[test_inds]
40+
tau_train = tau_X[train_inds]
41+
tau_test = tau_X[test_inds]
42+
mu_train = mu_X[train_inds]
43+
mu_test = mu_X[test_inds]
44+
y_train = y[train_inds]
45+
y_test = y[test_inds]
46+
E_XZ_train = E_XZ[train_inds]
47+
E_XZ_test = E_XZ[test_inds]
48+
49+
# Fit simple BCF model
50+
bcf_model = BCFModel()
51+
bcf_model.sample(
52+
X_train=X_train,
53+
Z_train=Z_train,
54+
pi_train=pi_train,
55+
y_train=y_train,
56+
X_test=X_test,
57+
Z_test=Z_test,
58+
pi_test=pi_test,
59+
)
60+
61+
# Predict out of sample
62+
y_hat_test = bcf_model.predict(X=X_test, Z=Z_test, propensity=pi_test, type="mean", terms = "y_hat")
63+
64+
# Compute RMSE using both cached predictions and those returned by predict()
65+
rmses_cached[i] = np.sqrt(np.mean(np.power(np.mean(bcf_model.y_hat_test, axis = 1) - E_XZ_test, 2.0)))
66+
rmses_pred[i] = np.sqrt(np.mean(np.power(y_hat_test - E_XZ_test, 2.0)))
67+
68+
print(f"Average RMSE, cached: {np.mean(rmses_cached):.4f}, out-of-sample pred: {np.mean(rmses_pred):.4f}")

tools/simulations/bcf-pred-rmse.R

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,12 @@
22
library(stochtree)
33

44
# Simulation parameters
5-
n <- 500
6-
p <- 5
5+
n <- 250
6+
p <- 50
77
n_sim <- 100
88
test_set_pct <- 0.2
99

10-
# Simulation container
10+
# Simulation containers
1111
rmses_cached <- rep(NA_real_, n_sim)
1212
rmses_pred <- rep(NA_real_, n_sim)
1313

0 commit comments

Comments
 (0)