diff --git a/elfi/examples/example_bolfi_invocation.py b/elfi/examples/example_bolfi_invocation.py
new file mode 100644
index 00000000..60f71f32
--- /dev/null
+++ b/elfi/examples/example_bolfi_invocation.py
@@ -0,0 +1,314 @@
+import elfi
+import matplotlib
+import matplotlib.pyplot as plt
+import numpy as np
+import scipy.stats as ss
+
+from elfi import BOLFI
+from matplotlib.animation import FuncAnimation, FFMpegWriter
+
+seed = 0
+
+# Number of minimum samples.
+n_low = 1
+# Number of maximum samples.
+n = 5
+np.random.seed(seed)
+gauss_samples = ss.norm().rvs(size=n)
+"""
+t_plus_range = [0.36, 0.44]
+t_plus = np.linspace(*t_plus_range, 200)
+model = lambda t_plus: t_plus**2
+x_range = [0.36, 0.44]
+y_range = [-0.8, 1.5]
+canvas = np.meshgrid(np.linspace(*x_range, 200),
+                     np.linspace(*reversed(y_range), 200))
+t_plus_samples = t_plus_range.copy()
+
+fit_spline = []
+fit_spline_uncertainty = []
+fit_acq = []
+fit_discrepancy = []
+fit_acq_t_plus = []
+
+prior = elfi.Prior('norm', 0.4, 0.02**2, name='t_+')
+elfi_simulator = elfi.Simulator(
+    lambda *args, **_: model(args[0]) + ss.norm().rvs(size=1)[0],
+    prior, observed=model(0.415)
+)
+processed_data = elfi.Summary(lambda data: data, elfi_simulator)
+distance = elfi.Distance('euclidean', processed_data)
+log_distance = elfi.Operation(np.log, distance)
+bolfi = elfi.BOLFI(
+    log_distance, initial_evidence=10, bounds={'t_+': t_plus_range}, seed=seed
+)
+for i in range(20):
+    bolfi.fit(n_evidence=10 + i + 1, bar=True)
+    #plt.plot(t_plus, bolfi.target_model.predict(t_plus, noiseless=True)[0])
+    #plt.show()
+"""
+
+t_plus_range = [0.36, 0.44]
+t_plus = np.linspace(*t_plus_range, 200)
+
+
+def model(t_plus):
+    return t_plus**2
+
+
+x_range = [0.358, 0.442]
+t_plus_eval = np.linspace(*x_range, 200)
+y_range = [-0.012, 0.052]
+canvas = np.meshgrid(np.linspace(*x_range, 200),
+                     np.linspace(*reversed(y_range), 200))
+t_plus_samples = t_plus_range.copy()
+
+fit_spline = []
+fit_spline_uncertainty = []
+fit_acq = []
+fit_discrepancy = []
+fit_acq_t_plus = []
+
+prior = elfi.Prior('norm', 0.4, 0.02, name='t_+')
+elfi_simulator = elfi.Simulator(
+    lambda *args, **_: model(args[0]) + 0.001 * ss.norm().rvs(size=1)[0],
+    prior, observed=model(0.415)
+)
+processed_data = elfi.Summary(lambda data: data, elfi_simulator)
+distance = elfi.Distance('euclidean', processed_data)
+# log_distance = elfi.Operation(np.log, distance)
+
+
+class Animate_GP:
+    def __init__(self, fig, ax, initial_evidence=0, frames=None):
+        self.bolfi = elfi.BOLFI(
+            distance,
+            initial_evidence=initial_evidence,
+            bounds={'t_+': t_plus_range},
+            seed=seed,
+            batch_size=10,
+        )
+        self.ax = ax
+        self.ax.set_xlim(*x_range)
+        self.ax.set_ylim(*y_range)
+        self.ax.xaxis.set_ticks([])
+        self.ax.yaxis.set_ticks([])
+        self.ax.set_xlabel("model parameter")
+        self.ax.set_ylabel("discrepancy")
+        self.initial_evidence = initial_evidence
+        self.line, = self.ax.plot([], [])
+        self.acq, = self.ax.plot([], [])
+        self.img = self.ax.imshow(
+            0 * canvas[0],
+            cmap='gist_yarg',
+            vmin=0,
+            vmax=1,
+            aspect='auto',
+            extent=[*x_range, *y_range]
+        )
+        self.pts, = self.ax.plot(
+            [], [], lw=0, marker='o',
+            color=plt.rcParams['axes.prop_cycle'].by_key()['color'][0]
+        )
+        trans = matplotlib.transforms.ScaledTranslation(
+            240/72, -10/72, fig.dpi_scale_trans
+        )
+        self.txt = self.ax.text(
+            0.0, 1.0, str(self.initial_evidence) + " samples    ",
+            transform=ax.transAxes + trans, verticalalignment='top'
+        )
+        self.frames = frames
+
+    def __call__(self, i):
+        if self.frames is not None:
+            n_evidence = self.initial_evidence + self.frames[i]
+        else:
+            n_evidence = self.initial_evidence + i
+        self.bolfi.fit(n_evidence=n_evidence)
+        mean, var = self.bolfi.target_model.predict(
+            t_plus_eval, noiseless=True
+        )
+        self.line.set_data(t_plus_eval, mean)
+        eta_squared = 2 * np.log(
+            n_evidence**4 * np.pi**2 / 0.3
+        )
+        self.acq.set_data(t_plus_eval, mean - np.sqrt(eta_squared * var))
+        unc = (
+            np.exp(-(canvas[1] - mean.T)**2 / (2 * var.T))
+            / np.sqrt(2 * np.pi * var.T)
+        )
+        self.ax.imshow(
+            unc,
+            cmap='gist_yarg',
+            vmin=0,
+            vmax=np.max(unc),
+            aspect='auto',
+            extent=[*x_range, *y_range]
+        )
+        self.pts.set_data(
+            self.bolfi.target_model._gp.X, self.bolfi.target_model._gp.Y
+        )
+        self.txt.set_text(str(n_evidence) + " samples")
+        return self.line, self.acq, self.img, self.pts, self.txt
+
+
+initial_evidence = 3
+frames = np.array([3, 4, 13, 14]) - initial_evidence
+
+fig, ax = plt.subplots(figsize=(6, 4))
+anim_gp = Animate_GP(fig, ax, initial_evidence=initial_evidence, frames=frames)
+anim = FuncAnimation(
+    fig, anim_gp, frames=4, interval=200, blit=False, repeat=False
+)
+
+bolfi = elfi.BOLFI(
+    distance,
+    initial_evidence=initial_evidence,
+    bounds={'t_+': t_plus_range},
+    seed=seed,
+    batch_size=10,
+)
+
+likelihood_range = [0.405, 0.425]
+likelihood_eval = np.linspace(*likelihood_range, 200)
+gps_at_each_frame = []
+
+fig_static, axes = plt.subplots(figsize=(6, 4), nrows=2, ncols=2)
+
+for i, ax in enumerate(axes.flatten()):
+    ax.xaxis.set_ticks([])
+    ax.yaxis.set_ticks([])
+    ax.set_xlim(*x_range)
+    ax.set_ylim(*y_range)
+    if frames is not None:
+        n_evidence = initial_evidence + frames[i]
+    else:
+        n_evidence = initial_evidence + i
+    bolfi.fit(n_evidence=n_evidence)
+    trans = matplotlib.transforms.ScaledTranslation(
+        126/72, -5/72, fig.dpi_scale_trans
+    )
+    mean, var = bolfi.target_model.predict(t_plus_eval, noiseless=True)
+    gps_at_each_frame.append(
+        bolfi.target_model.predict(likelihood_eval, noiseless=True)
+    )
+    ax.plot(t_plus_eval, mean)
+    eta_squared = 2 * np.log(
+        n_evidence**4 * np.pi**2 / 0.3
+    )
+    ax.plot(t_plus_eval, mean - np.sqrt(eta_squared * var))
+    unc = (
+        np.exp(-(canvas[1] - mean.T)**2 / (2 * var.T))
+        / np.sqrt(2 * np.pi * var.T)
+    )
+    ax.imshow(
+        unc,
+        cmap='gist_yarg',
+        vmin=0,
+        vmax=np.max(unc),
+        aspect='auto',
+        extent=[*x_range, *y_range]
+    )
+    ax.scatter(
+        bolfi.target_model._gp.X, bolfi.target_model._gp.Y
+    )
+    ax.text(
+        0.0, 1.0, str(n_evidence) + " samples    ",
+        transform=ax.transAxes + trans,
+        verticalalignment='top', horizontalalignment='right'
+    )
+fig_static.supxlabel(r"Model parameter $\theta$")
+fig_static.supylabel("Model-data distance")
+fig_static.tight_layout()
+
+posterior = bolfi.extract_posterior()
+threshold = posterior.threshold
+likelihood_yrange = [-0.0012, 0.0082]
+likelihood_canvas = np.meshgrid(
+    np.linspace(*likelihood_range, 200),
+    np.linspace(*reversed(likelihood_yrange), 200)
+)
+mean, var = bolfi.target_model.predict(likelihood_eval, noiseless=True)
+likelihood = ss.norm().cdf((threshold - mean) / np.sqrt(var))
+normalizing = np.sum(
+    0.5
+    * (likelihood[:-1] + likelihood[1:])
+    * (t_plus_eval[1:] - t_plus_eval[:-1])
+)
+
+fig_int, (ax_gp, ax_int) = plt.subplots(figsize=(6, 4), nrows=2)
+ax_gp.set_xlim(*likelihood_range)
+ax_int.set_xlim(*likelihood_range)
+ax_gp.xaxis.set_ticks([])
+ax_int.xaxis.set_ticks([])
+ax_gp.yaxis.set_ticks([])
+ax_int.yaxis.set_ticks([])
+ax_gp.set_ylabel(r"$\log||y_i(\theta)-y_i^\star||$")
+ax_int.set_ylabel(r"$L_K(\theta)$")
+ax_int.set_xlabel(r"Model parameter $\theta$")
+trans_int = matplotlib.transforms.ScaledTranslation(
+    10/72, -5/72, fig_int.dpi_scale_trans
+)
+ax_gp.text(0.0, 1.0, '(a)', transform=ax_gp.transAxes + trans_int,
+           verticalalignment='top', fontsize=20)
+ax_int.text(0.0, 1.0, '(b)', transform=ax_int.transAxes + trans_int,
+            verticalalignment='top', fontsize=20)
+trans_samples = matplotlib.transforms.ScaledTranslation(
+    162/72, -10/72, fig.dpi_scale_trans
+)
+ax_gp.text(
+    0.0, 1.0, str(n_evidence) + " samples    ",
+    transform=ax_gp.transAxes + trans_samples, verticalalignment='top'
+)
+ax_gp.plot(likelihood_eval, mean)
+ax_gp.plot(likelihood_eval, mean - np.sqrt(eta_squared * var))
+ax_gp.plot(likelihood_eval, [threshold] * len(likelihood_eval))
+ax_gp.scatter(
+    bolfi.target_model._gp.X, bolfi.target_model._gp.Y
+)
+truncated_unc = (
+    np.exp(-(likelihood_canvas[1] - mean.T)**2 / (2 * var.T))
+    / np.sqrt(2 * np.pi * var.T)
+) / normalizing * (likelihood_canvas[1] < threshold)
+ax_gp.imshow(
+    truncated_unc,
+    cmap='gist_yarg',
+    vmin=0,
+    vmax=np.max(truncated_unc),
+    aspect='auto',
+    extent=[*likelihood_range, *likelihood_yrange]
+)
+ax_int.plot(
+    likelihood_eval,
+    likelihood / normalizing,
+    color=plt.rcParams['axes.prop_cycle'].by_key()['color'][4]
+)
+
+fig_int.tight_layout()
+
+eps_min = -0.005
+eps_max = 0.04
+threshold_eval = np.linspace(eps_min, eps_max, 200)
+norm = matplotlib.colors.Normalize(eps_min, eps_max)
+cmap = plt.get_cmap('viridis')
+fig_eps, ax_eps = plt.subplots(
+    figsize=(4 * 2**0.5, 4), constrained_layout=True)
+# mean, var = gps_at_each_frame[2]
+for eps in threshold_eval:
+    likelihood_eps = ss.norm().cdf((eps - mean) / np.sqrt(var))
+    normalizing_eps = np.sum(
+        0.5
+        * (likelihood_eps[:-1] + likelihood_eps[1:])
+        * (t_plus_eval[1:] - t_plus_eval[:-1])
+    )
+    ax_eps.plot(
+        likelihood_eval,
+        likelihood_eps / normalizing_eps,
+        color=cmap(norm(eps))
+    )
+ax_eps.set_title("Normalized Likelihoods")
+fig_eps.colorbar(matplotlib.cm.ScalarMappable(
+    norm=norm, cmap=cmap
+), ax=ax_eps, label="threshold")
+
+plt.show()
diff --git a/elfi/examples/example_gpytorchbolfi_invocation.py b/elfi/examples/example_gpytorchbolfi_invocation.py
new file mode 100644
index 00000000..6b744094
--- /dev/null
+++ b/elfi/examples/example_gpytorchbolfi_invocation.py
@@ -0,0 +1,321 @@
+import torch
+# import pyro
+import elfi
+import matplotlib
+import matplotlib.pyplot as plt
+import numpy as np
+import scipy.stats as ss
+
+from elfi.methods.inference.gpytorch_bolfi import GPyTorchBOLFI
+from copy import deepcopy
+from matplotlib.animation import FuncAnimation, FFMpegWriter
+from torch import tensor
+
+seed = 0
+
+np.random.seed(seed)
+t_plus_range = [0.36, 0.44]
+t_plus = np.linspace(*t_plus_range, 200)
+
+
+def model(t_plus):
+    return t_plus**2
+
+
+x_range = [0.358, 0.442]
+t_plus_eval = np.linspace(*x_range, 200)
+y_range = [-0.012, 0.052]
+canvas = np.meshgrid(np.linspace(*x_range, 200),
+                     np.linspace(*reversed(y_range), 200))
+t_plus_samples = t_plus_range.copy()
+
+fit_spline = []
+fit_spline_uncertainty = []
+fit_acq = []
+fit_discrepancy = []
+fit_acq_t_plus = []
+
+prior = elfi.Prior('norm', 0.4, 0.02, name='t_+')
+# pyro_torch_prior = pyro.distributions.Normal(0.4, 0.02)
+elfi_simulator = elfi.Simulator(
+    lambda *args, **_: model(args[0]) + 0.001 * ss.norm().rvs(size=1)[0],
+    prior, observed=model(0.415)
+)
+processed_data = elfi.Summary(lambda data: data, elfi_simulator)
+distance = elfi.Distance('euclidean', processed_data)
+# log_distance = elfi.Operation(np.log, distance)
+
+class Animate_GP:
+    def __init__(self, fig, ax, initial_evidence=0, frames=None):
+
+        self.bolfi = GPyTorchBOLFI(
+            distance,
+            initial_evidence=initial_evidence,
+            bounds={'t_+': t_plus_range},
+            seed=seed,
+            max_opt_iters=200,
+            # pyro_torch_prior=pyro_torch_prior,
+        )
+        self.ax = ax
+        self.ax_acq = self.ax.twinx()
+        self.ax.set_xlim(*x_range)
+        self.ax_acq.set_xlim(*x_range)
+        # self.ax.set_ylim(*y_range)
+        self.ax.xaxis.set_ticks([])
+        self.ax.yaxis.set_ticks([])
+        self.ax_acq.xaxis.set_ticks([])
+        self.ax_acq.yaxis.set_ticks([])
+        self.ax.set_xlabel("model parameter")
+        self.ax.set_ylabel("discrepancy")
+        self.initial_evidence = initial_evidence
+        self.line, = self.ax.plot([], [])
+        self.acq, = self.ax_acq.plot([], [], color="orange")
+        self.img = self.ax.imshow(
+            0 * canvas[0],
+            cmap='gist_yarg',
+            vmin=0,
+            vmax=1,
+            aspect='auto',
+            extent=[*x_range, *y_range]
+        )
+        self.pts, = self.ax.plot(
+            [], [], lw=0, marker='o',
+            color=plt.rcParams['axes.prop_cycle'].by_key()['color'][0]
+        )
+        trans = matplotlib.transforms.ScaledTranslation(
+            240/72, -10/72, fig.dpi_scale_trans
+        )
+        self.txt = self.ax.text(
+            0.0, 1.0, str(self.initial_evidence) + " samples    ",
+            transform=ax.transAxes + trans, verticalalignment='top'
+        )
+        self.frames = frames
+        self.target_models = []
+        self.posteriors = []
+
+    def __call__(self, i):
+        if self.frames is not None:
+            n_evidence = self.initial_evidence + self.frames[i]
+        else:
+            n_evidence = self.initial_evidence + i
+        self.bolfi.fit(n_evidence=n_evidence)
+        mean, var = self.bolfi.target_model.predict(
+            t_plus_eval, noiseless=True
+        )
+        self.line.set_data(t_plus_eval, mean)
+        self.acq.set_data(t_plus_eval, self.bolfi.acquisition_method.evaluate(
+            torch.tensor(t_plus_eval).unsqueeze(-1).unsqueeze(-1),
+            t=n_evidence - self.initial_evidence
+        ).detach().numpy())
+        unc = (
+            np.exp(-(canvas[1] - mean)**2 / (2 * var))
+            / np.sqrt(2 * np.pi * var)
+        )
+        self.ax.imshow(
+            unc,
+            cmap='gist_yarg',
+            vmin=0,
+            vmax=np.max(unc),
+            aspect='auto',
+            extent=[*x_range, *y_range]
+        )
+        self.pts.set_data(
+            self.bolfi.target_model.X.T.detach().numpy(),
+            self.bolfi.target_model.Y.detach().numpy()
+        )
+        self.txt.set_text(str(n_evidence) + " samples")
+        # Store the results.
+        self.target_models.append(deepcopy(self.bolfi.target_model))
+        self.posteriors.append(self.bolfi.extract_posterior())
+        return self.line, self.acq, self.pts, self.txt  # ,self.img
+
+
+initial_evidence = 10
+frames = np.array([i for i in range(10, 24)]) - initial_evidence
+# initial_evidence = 10
+# frames = np.array([i for i in range(10, 41)]) - initial_evidence
+
+fig, ax = plt.subplots(figsize=(6, 4))
+anim_gp = Animate_GP(fig, ax, initial_evidence=initial_evidence, frames=frames)
+anim = FuncAnimation(
+    fig, anim_gp, frames=len(frames), interval=200, blit=False, repeat=False
+)
+plt.show()
+
+likelihood_range = [0.405, 0.425]
+likelihood_eval = np.linspace(*likelihood_range, 200)
+gps_at_each_frame = []
+
+fig_static, axes = plt.subplots(figsize=(6, 4), nrows=2, ncols=2)
+
+for i, ax in enumerate(axes.flatten()):
+    if i == 2:
+        i = -2
+    if i == 3:
+        i = -1
+
+    ax.xaxis.set_ticks([])
+    ax.yaxis.set_ticks([])
+    ax.set_xlim(*x_range)
+    ax_acq = ax.twinx()
+    ax_acq.set_xlim(*x_range)
+    ax.set_ylim(*y_range)
+    target_model = anim_gp.target_models[i]
+    trans = matplotlib.transforms.ScaledTranslation(
+        126/72, -5/72, fig.dpi_scale_trans
+    )
+    mean, var = target_model.predict(t_plus_eval, noiseless=True)
+    gps_at_each_frame.append(
+        target_model.predict(likelihood_eval, noiseless=True)
+    )
+    ax.plot(t_plus_eval, mean)
+    if frames is not None:
+        n_evidence = initial_evidence + frames[i]
+    else:
+        n_evidence = initial_evidence + i
+    ax_acq.plot(t_plus_eval, anim_gp.bolfi.acquisition_method.evaluate(
+        torch.tensor(t_plus_eval).unsqueeze(-1).unsqueeze(-1),
+        t=frames[-1] - initial_evidence
+    ).detach().numpy(), color="orange")
+    unc = (
+        np.exp(-(canvas[1] - mean)**2 / (2 * var))
+        / np.sqrt(2 * np.pi * var)
+    )
+    ax.imshow(
+        unc,
+        cmap='gist_yarg',
+        vmin=0,
+        vmax=np.max(unc),
+        aspect='auto',
+        extent=[*x_range, *y_range]
+    )
+    ax.scatter(
+        target_model.X.T.detach().numpy(),
+        target_model.Y.detach().numpy()
+    )
+    ax.text(
+        0.0, 1.0, str(n_evidence) + " samples    ",
+        transform=ax.transAxes + trans,
+        verticalalignment='top', horizontalalignment='right'
+    )
+fig_static.supxlabel(r"Model parameter $\theta$")
+fig_static.supylabel("Model-data distance")
+fig_static.tight_layout()
+
+posterior = anim_gp.posteriors[-1]
+target_model = anim_gp.target_models[-1]
+threshold = posterior.threshold  # .loc.detach().numpy()
+likelihood_yrange = [-0.0012, 0.0082]
+likelihood_canvas = np.meshgrid(
+    np.linspace(*likelihood_range, 200),
+    np.linspace(*reversed(likelihood_yrange), 200)
+)
+mean, var = target_model.predict(likelihood_eval, noiseless=True)
+likelihood = anim_gp.bolfi.acquisition_method.evaluate(
+    torch.tensor(likelihood_eval).unsqueeze(-1).unsqueeze(-1),
+    t=frames[-1] - initial_evidence
+).detach().numpy()
+normalizing = np.sum(
+    0.5
+    * (likelihood[:-1] + likelihood[1:])
+    * (t_plus_eval[1:] - t_plus_eval[:-1])
+)
+
+fig_int, (ax_gp, ax_int) = plt.subplots(figsize=(6, 4), nrows=2)
+ax_gp_acq = ax_gp.twinx()
+ax_gp.set_xlim(*likelihood_range)
+ax_gp_acq.set_xlim(*likelihood_range)
+ax_int.set_xlim(*likelihood_range)
+ax_gp.xaxis.set_ticks([])
+ax_int.xaxis.set_ticks([])
+ax_gp_acq.xaxis.set_ticks([])
+ax_gp.yaxis.set_ticks([])
+ax_int.yaxis.set_ticks([])
+ax_gp_acq.yaxis.set_ticks([])
+ax_gp.set_ylabel(r"$\log||y_i(\theta)-y_i^\star||$")
+ax_int.set_ylabel(r"$L_K(\theta)$")
+ax_int.set_xlabel(r"Model parameter $\theta$")
+trans_int = matplotlib.transforms.ScaledTranslation(
+    10/72, -5/72, fig_int.dpi_scale_trans
+)
+ax_gp.text(0.0, 1.0, '(a)', transform=ax_gp.transAxes + trans_int,
+           verticalalignment='top', fontsize=20)
+ax_int.text(0.0, 1.0, '(b)', transform=ax_int.transAxes + trans_int,
+            verticalalignment='top', fontsize=20)
+trans_samples = matplotlib.transforms.ScaledTranslation(
+    162/72, -10/72, fig.dpi_scale_trans
+)
+ax_gp.text(
+    0.0, 1.0, str(n_evidence) + " samples    ",
+    transform=ax_gp.transAxes + trans_samples, verticalalignment='top'
+)
+ax_gp.plot(likelihood_eval, mean)
+ax_gp_acq.plot(
+    likelihood_eval,
+    anim_gp.bolfi.acquisition_method.evaluate(
+        torch.tensor(t_plus_eval).unsqueeze(-1).unsqueeze(-1),
+        t=frames[-1] - initial_evidence
+    ).detach().numpy(),
+    color="orange"
+)
+ax_gp.plot(likelihood_eval, [threshold] * len(likelihood_eval))
+ax_gp.scatter(
+    target_model.X.T.detach().numpy(),
+    target_model.Y.detach().numpy()
+)
+truncated_unc = (
+    np.exp(-(likelihood_canvas[1] - mean)**2 / (2 * var))
+    / np.sqrt(2 * np.pi * var)
+) / normalizing * (likelihood_canvas[1] < threshold)
+ax_gp.imshow(
+    truncated_unc,
+    cmap='gist_yarg',
+    vmin=0,
+    vmax=np.max(truncated_unc),
+    aspect='auto',
+    extent=[*likelihood_range, *likelihood_yrange]
+)
+ax_int.plot(
+    likelihood_eval,
+    likelihood / normalizing,
+    color=plt.rcParams['axes.prop_cycle'].by_key()['color'][4]
+)
+
+fig_int.tight_layout()
+
+eps_min = -1
+eps_max = 1
+threshold_eval = np.linspace(eps_min, eps_max, 200)
+norm = matplotlib.colors.Normalize(eps_min, eps_max)
+cmap = plt.get_cmap('viridis')
+fig_eps, ax_eps = plt.subplots(
+    figsize=(4 * 2**0.5, 4), constrained_layout=True
+)
+# mean, var = gps_at_each_frame[2]
+for eps in threshold_eval:
+    likelihood_eps = ss.norm().cdf((eps - mean) / np.sqrt(var))
+    normalizing_eps = np.sum(
+        0.5
+        * (likelihood_eps[:-1] + likelihood_eps[1:])
+        * (t_plus_eval[1:] - t_plus_eval[:-1])
+    )
+    ax_eps.plot(
+        likelihood_eval,
+        likelihood_eps / normalizing_eps,
+        color=cmap(norm(eps))
+    )
+ax_eps.set_title("Normalized Likelihoods")
+fig_eps.colorbar(matplotlib.cm.ScalarMappable(
+    norm=norm, cmap=cmap
+), ax=ax_eps, label="threshold")
+
+plt.show()
+
+print(anim_gp.bolfi.target_model._gp(torch.linspace(*x_range, 200)))
+print(anim_gp.bolfi.target_model._gp.likelihood(torch.linspace(*x_range, 200)))
+print(anim_gp.bolfi.target_model._gp.likelihood(
+    anim_gp.bolfi.target_model._gp(torch.linspace(*x_range, 200))
+))
+
+sample_size = 20
+print(anim_gp.bolfi.sample(sample_size, threshold=0, n_chains=1))
diff --git a/elfi/methods/bo/botorch_acquisition.py b/elfi/methods/bo/botorch_acquisition.py
new file mode 100644
index 00000000..1dd2f287
--- /dev/null
+++ b/elfi/methods/bo/botorch_acquisition.py
@@ -0,0 +1,134 @@
+# SPDX-FileCopyrightText: 2024 Yannick Kuhn <Yannick.Kuhn@dlr.de>
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+
+from botorch.acquisition.analytic import UpperConfidenceBound
+from botorch.generation.gen import gen_candidates_torch, get_best_candidates
+from botorch.optim.initializers import gen_batch_initial_conditions
+from elfi.methods.bo.acquisition import AcquisitionBase
+from numpy import log, pi, random
+from torch import float, long, tensor
+
+
+class BoTorchAcquisitionBase(AcquisitionBase):
+    """Mimics the AcquisitionBase in ELFI, but botorch-compatible.
+    """
+
+    def __init__(
+        self,
+        model,
+        prior=None,
+        n_inits=10,
+        max_opt_iters=1000,
+        noise_var=None,
+        exploration_rate=10,
+        seed=None,
+        constraints=None,
+    ):
+        """
+        :param model:
+            An instance of ``GPyTorchRegression``. Needs to have the
+            following attributes: `input_dim`, `bounds`,
+            `parameter_names`, and a callable `predict(x, noiseless)`.
+        """
+        self.model = model
+        if prior is not None:
+            raise NotImplementedError(
+                "Priors for acquisition function not implemented."
+            )
+        else:
+            self.prior = prior
+        self.n_inits = int(n_inits)
+        self.max_opt_iters = int(max_opt_iters)
+        self.constraints = constraints
+        if noise_var is not None:
+            self._check_noise_var(noise_var)
+            self.noise_var = self._transform_noise_var(noise_var)
+        else:
+            self.noise_var = noise_var
+        self.exploration_rate = exploration_rate
+        self.random_state = (
+            random if seed is None else random.RandomState(seed)
+        )
+        self.seed = 0 if seed is None else seed
+
+    def acquire(self, n, t=None):
+        raise NotImplementedError
+
+
+class BoTorchLCBSC(BoTorchAcquisitionBase):
+    def _beta(self, t):
+        # Count from 0.
+        if t is None:
+            t = 0
+        t += 1
+        d = self.model.input_dim
+        return 2 * log(t**(2 * d + 2) * pi**2 / (3 / self.exploration_rate))
+
+    def evaluate(self, x, t=None):
+        """
+        Evaluate the acquisition function at `x`.
+
+        :param x:
+            The test variable to evaluate at.
+        :param t:
+            The index of the current iteration, starting from 0.
+        :returns:
+            the lower confindence bound at `x`.
+        """
+        lcb = UpperConfidenceBound(
+            self.model._gp,
+            beta=self._beta(t),
+            maximize=False
+        )
+        return -lcb(x)
+
+    def acquire(self, n, t=None, std_scale=None):
+        """
+        Minimize the acquisition function.
+
+        :param n:
+            Number of acquisition points to return.
+        :param t:
+            The index of the current iteration, starting from 0.
+        :param std_scale:
+            If left at None, logarithmically scales up the exploration.
+            Else gives a fixed numerical value for the prefactor of the
+            standard deviation in ``mu - sqrt(prefactor) * std``.
+        :returns:
+            np.ndarray; the shape is ``(n, input_dim)``.
+        """
+        lcb = UpperConfidenceBound(
+            self.model._gp,
+            beta=std_scale or self._beta(t),
+            maximize=False,
+        )
+        bounds = self.model.torch_bounds
+        x_init = gen_batch_initial_conditions(
+            lcb,
+            bounds,
+            q=n,
+            num_restarts=25,
+            raw_samples=500 * 2**self.model.input_dim,
+            inequality_constraints=[
+                (tensor([i], dtype=long),
+                 tensor([1], dtype=float),
+                 b)
+                for i, b in enumerate(bounds[0])
+            ] + [
+                (tensor([i], dtype=long),
+                 tensor([-1], dtype=float),
+                 -b)
+                for i, b in enumerate(bounds[1])
+            ]
+        )
+        # While gen_candidates_scipy is exact, the noise is actually useful.
+        batch_candidates, batch_acq_values = gen_candidates_torch(
+            initial_conditions=x_init,
+            acquisition_function=lcb,
+            lower_bounds=bounds[0],
+            upper_bounds=bounds[1],
+        )
+        candidate = get_best_candidates(batch_candidates, -batch_acq_values)
+        return candidate.detach().numpy()
diff --git a/elfi/methods/bo/gpytorch_bolfi_model.py b/elfi/methods/bo/gpytorch_bolfi_model.py
new file mode 100644
index 00000000..cfe23708
--- /dev/null
+++ b/elfi/methods/bo/gpytorch_bolfi_model.py
@@ -0,0 +1,460 @@
+# SPDX-FileCopyrightText: 2024 Yannick Kuhn <Yannick.Kuhn@dlr.de>
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+from botorch.models.gp_regression import SingleTaskGP
+import gpytorch
+from gpytorch.constraints import Positive
+from gpytorch.kernels import RBFKernel, ScaleKernel
+from gpytorch.means.mean import Mean
+from gpytorch.priors import GammaPrior, NormalPrior
+import numpy as np
+from scipy.optimize import minimize
+import torch
+
+
+class ParabolicMean(Mean):
+    """
+    A parabolic Prior mean function, i.e.:
+
+    µ(x) = a ⋅ x² + b ⋅ x + c
+
+    or more generally for multidimensional inputs:
+
+    µ(x) = Σⱼ (aⱼ ⋅ xⱼ² + bⱼ ⋅ xⱼ + c)
+    """
+
+    def __init__(
+        self,
+        list_of_square_priors,
+        list_of_linear_priors,
+        constant_prior,
+        batch_shape=torch.Size(),
+        **kwargs,
+    ):
+        super().__init__()
+
+        self.dim = len(list_of_square_priors)
+        self.batch_shape = batch_shape
+
+        for j, square_prior in enumerate(list_of_square_priors):
+            self.register_parameter(
+                name='raw_square_coefficient_' + str(j),
+                parameter=torch.nn.Parameter(
+                    square_prior.mean * torch.ones(batch_shape)
+                )
+            )
+            self.register_prior(
+                'square_coefficient_' + str(j) + '_prior',
+                square_prior,
+                lambda module, index=j: module.square_coefficients[index],
+                lambda module, value, index=j: (
+                    module._square_closure(value, index)
+                ),
+            )
+            self.register_constraint(
+                'raw_square_coefficient_' + str(j), Positive()
+            )
+
+        for j, linear_prior in enumerate(list_of_linear_priors):
+            self.register_parameter(
+                name='raw_linear_coefficient_' + str(j),
+                parameter=torch.nn.Parameter(
+                    linear_prior.mean * torch.ones(batch_shape)
+                )
+            )
+            self.register_prior(
+                'linear_coefficient_' + str(j) + '_prior',
+                linear_prior,
+                lambda module, index=j: module.linear_coefficients[index],
+                lambda module, value, index=j: (
+                    module._linear_closure(value, index)
+                ),
+            )
+
+        self.register_parameter(
+            name='raw_constant',
+            parameter=torch.nn.Parameter(
+                constant_prior.mean * torch.ones(batch_shape)
+            )
+        )
+        self.register_prior(
+            'constant_prior',
+            constant_prior,
+            lambda module: module.constant,
+            lambda module, value: module._constant_closure(value)
+        )
+
+    @property
+    def square_coefficients(self):
+        return torch.stack([
+            getattr(
+                self, 'raw_square_coefficient_' + str(index) + '_constraint'
+            ).transform(
+                getattr(self, 'raw_square_coefficient_' + str(index))
+            )
+            for index in range(self.dim)
+        ], dim=0)
+
+    @square_coefficients.setter
+    def square_coefficients(self, value):
+        # When value is e.g. a NumPy array, convert to a Torch tensor
+        # and copy dtype and device (CPU/GPU) from raw parameters.
+        if not torch.is_tensor(value):
+            value = torch.as_tensor(value).to(self.square_coefficients)
+
+        for index in range(self.dim):
+            self._square_closure(self, value, index)
+
+    def _square_closure(self, value, index):
+        self.initialize(
+            **{
+                'raw_square_coefficient_' + str(index): (
+                    getattr(
+                        self,
+                        'raw_square_coefficient_' + str(index) + '_constraint'
+                    ).inverse_transform(value)
+                )
+            }
+        )
+
+    @property
+    def linear_coefficients(self):
+        return torch.stack([
+            getattr(self, 'raw_linear_coefficient_' + str(index))
+            for index in range(self.dim)
+        ], dim=0)
+
+    @linear_coefficients.setter
+    def linear_coefficients(self, value):
+        if not torch.is_tensor(value):
+            value = torch.as_tensor(value).to(self.linear_coefficients)
+
+        for index in range(self.dim):
+            self._linear_closure(self, value, index)
+
+    def _linear_closure(self, value, index):
+        self.initialize(
+            **{'raw_linear_coefficient_' + str(index): value}
+        )
+
+    @property
+    def constant(self):
+        return self.raw_constant
+
+    @constant.setter
+    def constant(self, value):
+        self._constant_closure(self, value)
+
+    def _constant_closure(self, value):
+        if not torch.is_tensor(value):
+            value = torch.as_tensor(value).to(self.constant)
+
+        self.initialize(raw_constant=value)
+
+    def forward(self, x):
+        # The unsqueeze makes these have shape batch_shape x 1.
+        square_coefficients = self.square_coefficients.unsqueeze(-1)
+        linear_coefficients = self.linear_coefficients.unsqueeze(-1)
+        constant = self.constant.unsqueeze(-1)
+
+        square_term = torch.sum(square_coefficients.T * x**2, dim=-1)
+        linear_term = torch.sum(linear_coefficients.T * x, dim=-1)
+
+        return square_term + linear_term + constant
+
+
+def BOLFIKernel(num_dims, length_scale, kernel_var):
+    # In original BOLFI, a variance term ("bias") would be added.
+    return ScaleKernel(
+        # ProductStructureKernel(
+        RBFKernel(
+            lengthscale_prior=GammaPrior(length_scale, 1),
+        ),
+        #     num_dims=num_dims
+        # ),
+        outputscale_prior=GammaPrior(kernel_var, 1))
+
+
+class BOLFIKernel_manually_implemented(gpytorch.kernels.Kernel):
+    """
+    Diagonal RBF kernel with importance weighting, i.e.:
+
+    v(x, y) = σ_f² ⋅ exp((x - y)² / λ²)
+
+    or more generally for multidimensional inputs:
+
+    v(x, y) = σ_f² ⋅ exp(Σⱼ (xⱼ - yⱼ)² / λⱼ²)
+
+    `list_of_lengthscale_priors` refers to the λⱼ.
+    `multiplicative_variance` refers to σ_f².
+    """
+
+    is_stationary = True
+
+    def __init__(
+        self,
+        list_of_lengthscale_priors,
+        multiplicative_variance_prior,
+        batch_shape=torch.Size(),
+        **kwargs
+    ):
+        super().__init__(**kwargs)
+
+        self.dim = len(list_of_lengthscale_priors)
+
+        for j, lengthscale_prior in enumerate(list_of_lengthscale_priors):
+            # Register the raw hyperparameter (i.e. without constraints)
+            # with the inherited helper method register_parameter.
+            self.register_parameter(
+                name='raw_lengthscale_coefficient_' + str(j),
+                parameter=torch.nn.Parameter(
+                    torch.zeros(self.batch_shape)
+                )
+            )
+
+            # Register the constraint with the inherited helper method
+            # register_constraint.
+            self.register_constraint(
+                'raw_lengthscale_coefficient_' + str(j), Positive()
+            )
+
+            # Set the parameter Prior with the inherited helper method
+            # register_prior.
+            # The first argument of the lambdas is an instance of this
+            # class, so treat it as self. The first lambda calls the
+            # method of this class that defines the parameter; its
+            # result will be input in the underlying probability
+            # distribution, initialized by the second argument.
+            # The second lambda additionally takes a tensor in
+            # (transformed) parameter space and initializes the internal
+            # parameter representation to the proper value by applying
+            # the inverse transform.
+            # This enables sampling parameter values from Priors.
+            if lengthscale_prior is not None:
+                self.register_prior(
+                    'lengthscale_coefficient_' + str(j) + '_prior',
+                    lengthscale_prior,
+                    lambda module, index=j: (
+                        module.lengthscale_coefficients[index]
+                    ),
+                    lambda module, value, index=j: (
+                        module._lengthscale_closure(value, index)
+                    ),
+                )
+
+        self.register_parameter(
+            name='raw_multiplicative_variance',
+            parameter=torch.nn.Parameter(torch.zeros(batch_shape))
+        )
+        self.register_constraint('raw_multiplicative_variance', Positive())
+        self.register_prior(
+            'multiplicative_variance_prior',
+            multiplicative_variance_prior,
+            lambda module: module.multiplicative_variance,
+            lambda module, value: (
+                module._multiplicative_variance_closure(value)
+            )
+        )
+        self.register_parameter(
+            name='raw_multiplicative_variance',
+            parameter=torch.nn.Parameter(torch.zeros(batch_shape))
+        )
+        self.register_constraint('raw_multiplicative_variance', Positive())
+        self.register_prior(
+            'multiplicative_variance_prior',
+            multiplicative_variance_prior,
+            lambda module: module.multiplicative_variance,
+            lambda module, value: (
+                module._multiplicative_variance_closure(value)
+            )
+        )
+
+    @property
+    def lengthscale_coefficients(self):
+        """Actual definition of the length scale as such."""
+        # The attributes stem from the register_* helper methods.
+        # Any constraints are applied here.
+        return torch.stack([
+            getattr(
+                self,
+                'raw_lengthscale_coefficient_' + str(index) + '_constraint'
+            ).transform(
+                getattr(self, 'raw_lengthscale_coefficient_' + str(index))
+            )
+            for index in range(self.dim)
+        ], dim=0)
+
+    @lengthscale_coefficients.setter
+    def lengthscale(self, value):
+        if not torch.is_tensor(value):
+            value = torch.as_tensor(value).to(self.square_coefficients)
+
+        for index in range(self.dim):
+            self._lengthscale_closure(self, value, index)
+
+    def _lengthscale_closure(self, value, index):
+        # When setting the paramater, transform the actual value to a raw one
+        # by applying the inverse transform. initialize is inherited.
+        self.initialize(
+            **{
+                'raw_lengthscale_coefficient_' + str(index): (
+                    getattr(
+                        self,
+                        'raw_lengthscale_coefficient_' + str(index)
+                        + '_constraint'
+                    ).inverse_transform(value[index])
+                )
+            }
+        )
+
+    @property
+    def multiplicative_variance(self):
+        return self.raw_multiplicative_variance_constraint.transform(
+            self.raw_multiplicative_variance
+        )
+
+    @multiplicative_variance.setter
+    def multiplicative_variance(self, value):
+        self._multiplicative_variance_closure(self, value)
+
+    def _multiplicative_variance_closure(self, value):
+        if not torch.is_tensor(value):
+            value = torch.as_tensor(value).to(self.multiplicative_variance)
+
+        self.initialize(raw_multiplicative_variance=value)
+
+    def forward(self, x1, x2, **params):
+        """Kernel function."""
+        # Read: x_ = x / self.lengthscale.
+        x1_ = x1.div(self.lengthscale)
+        x2_ = x2.div(self.lengthscale)
+
+        # covar_dist is an inherited helper method for computing the
+        # Euclidean distance between all pairs of points in x1 and x2.
+        # diff_matrix = self.covar_dist(x1_, x2_, **params)
+        diff_squared = torch.sum((x1_ - x2_)**2, dim=1)
+
+        return self.multiplicative_variance * torch.exp(diff_squared)
+
+
+class BOLFIModel(SingleTaskGP):
+
+    def __init__(
+        self,
+        train_x,
+        train_y,
+        likelihood=gpytorch.likelihoods.GaussianLikelihood(
+            noise_constraint=gpytorch.constraints.GreaterThan(0)
+        ),
+        bounds=[]
+    ):
+        if bounds == []:
+            raise NotImplementedError(
+                "Default bounds not implemented yet, please provide them."
+            )
+
+        self.bounds = torch.tensor(bounds)
+
+        self.dim = len(train_x[0])
+
+        # Heuristics to choose kernel parameters based on the initial
+        # data.
+        length_scale = (torch.max(self.bounds) - torch.min(self.bounds)) / 3
+        kernel_var = (torch.max(train_y) / 3)**2
+        # This refers to the fallback in BOLFI when no mean function is
+        # specified. Instead, a bias term gets added to the kernel.
+        # The bias term used has a Gamma distribution with expected
+        # value and variance both equal to bias_var.
+        # bias_var = kernel_var / 4
+        """
+        # In BOLFI, all Hyperpriors in the kernel are Gamma
+        # distributions. The lengthscale Priors are set to have expected
+        # value and variance
+        # both equal to length_scale. The multiplicate variance is set
+        # to have expected value and variance both equal to kernel_var.
+        self.covar_module = BOLFIKernel_manually_implemented(
+            list_of_lengthscale_priors=[
+                GammaPrior(length_scale, 1) for _ in range(self.dim)
+            ],
+            multiplicative_variance_prior=GammaPrior(kernel_var, 1),
+            # bias_variance_prior=GammaPrior(bias_var, 1),
+            # The first index in the shape is the "length" of the batch,
+            # while the second is always 1 due to technical reasons.
+            batch_shape=torch.Size([1, 1])
+        )
+        """
+        covar_module = BOLFIKernel(self.dim, length_scale, kernel_var)
+
+        # Use the initial parabolic fit to the initial training data
+        # as means of the hyperparameters.
+        def parabolic_fit_function(x, args):
+            a = np.array(args[:self.dim])
+            b = np.array(args[self.dim:-1])
+            c = args[-1]
+            return np.sum(a * x**2 + b * x, axis=1) + c
+
+        np_x = np.array(train_x.cpu())
+        # Since BoTorch requires an explicit output dimension, but it is
+        # assumed to be 1 here, re-squeeze the training tensor.
+        np_y = np.array(train_y.squeeze().cpu())
+
+        if len(train_x) == 1:
+            np_y = np.atleast_1d(np_y)
+            a = np.zeros(self.dim)
+            b = np.zeros(self.dim)
+            c = np_y[0]
+        elif len(train_x) == 2:
+            a = np.zeros(self.dim)
+            b = (np_y[1] - np_y[0]) / (np_x[1] - np_x[0])
+            c = np_y[0] - np.sum(b * np_x[0])
+        else:
+            parabolic_fit = minimize(
+                lambda *args: np.sum(
+                    (parabolic_fit_function(np_x, *args) - np_y)**2
+                )**0.5,
+                x0=np.append(np.ones(self.dim), np.zeros(self.dim + 1)),
+                method='trust-constr'
+            ).x
+            a = np.array(parabolic_fit[:self.dim])
+            b = np.array(parabolic_fit[self.dim:-1])
+            c = parabolic_fit[-1]
+
+        # Heuristics for the variances of the parabolic hyperparameters.
+        # a: check the "squaredness" of (train_y - b ⋅ train_x - c) / a.
+        if len(train_x) > 2:
+            a_var = np.atleast_1d(np.sum(
+                (np.sqrt(np.abs(
+                    (np_y[:, None] - b * np_x - c) / a
+                )) - np_x)**2, axis=0
+            ))  # **0.5 would be standard deviation
+        else:
+            a_var = np.ones(self.dim)
+        # b: check the deviation of parabola minimum and data minimum
+        # location.
+        b_var = np.atleast_1d((-b - 2 * a * np_x[np.argmin(np_y)])**2)
+        # c: check the deviation of parabola minimum and data minimum.
+        c_var = (c - np.min(np_y))**2
+
+        mean_module = ParabolicMean(
+            [NormalPrior(a_j, a_j_var) for a_j, a_j_var in zip(a, a_var)],
+            [NormalPrior(b_j, b_j_var) for b_j, b_j_var in zip(b, b_var)],
+            NormalPrior(c, c_var),
+            # The first index in the shape is the "length" of the batch,
+            # while the second is always 1 due to technical reasons.
+            # batch_shape=torch.Size([1, 1])
+        )
+
+        super().__init__(
+            train_X=train_x,
+            train_Y=train_y,
+            likelihood=likelihood,
+            covar_module=covar_module,
+            mean_module=mean_module
+        )
+
+    def forward(self, x):
+        mean_x = self.mean_module(x)
+        covar_x = self.covar_module(x)
+
+        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
diff --git a/elfi/methods/bo/gpytorch_regression.py b/elfi/methods/bo/gpytorch_regression.py
new file mode 100644
index 00000000..6edcd913
--- /dev/null
+++ b/elfi/methods/bo/gpytorch_regression.py
@@ -0,0 +1,404 @@
+# SPDX-FileCopyrightText: 2024 Yannick Kuhn <Yannick.Kuhn@dlr.de>
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+# This module contains an interface for using GPyTorch in ELFI.
+
+import copy
+import gpytorch
+import torch
+import warnings
+
+from elfi.methods.bo.gpytorch_bolfi_model import BOLFIModel
+
+
+class GPyTorchRegression:
+    """Gaussian Process regression using the GPyTorch library."""
+
+    def __init__(
+        self,
+        parameter_names=None,
+        bounds=None,
+        optimizer=None,
+        max_opt_iters=50,
+        gp=None,
+        initial_evidence=None,
+        **gp_params
+    ):
+        """
+        Initialize GPyTorchRegression.
+
+        :param parameter_names: list of str, optional
+            Names of parameter nodes. If None, sets dimension to 1.
+        :param bounds: dict, optional
+            The region where to estimate the posterior for each
+            parameter in ``model.parameters``.
+            ``{'parameter_name':(lower, upper), ... }``
+            If not supplied, defaults to (0, 1) bounds for all
+            dimensions.
+        :param optimizer: string, optional
+            Optimizer for the GP hyper parameters.
+        :param max_opt_iters: int, optional
+        :param gp: gpytorch.models.ExactGP, optional
+        """
+
+        if isinstance(initial_evidence, dict):
+            raise ValueError(
+                "Batch dict for initial evidence not implemented in this "
+                "re-implementation of BOLFI. Please implement it like in ELFI."
+            )
+
+        if parameter_names is None:
+            input_dim = 1
+        elif isinstance(parameter_names, (list, tuple)):
+            input_dim = len(parameter_names)
+        else:
+            raise ValueError(
+                "Keyword `parameter_names` must be a list of strings."
+            )
+
+        if bounds is None:
+            warnings.warn(
+                "Parameter bounds not specified. Using [0,1] for each "
+                "parameter."
+            )
+            bounds = [(0, 1)] * input_dim
+        elif len(bounds) != input_dim:
+            raise ValueError(
+                (
+                    "Length of `bounds` ({}) does not match the length of "
+                    + "`parameter_names` ({})."
+                ).format(len(bounds), input_dim)
+            )
+        elif isinstance(bounds, dict):
+            if len(bounds) == 1:  # might be parameter_names==None
+                bounds = [bounds[n] for n in bounds.keys()]
+            else:
+                # turn bounds dict into a list in the same order as
+                # parameter_names
+                bounds = [bounds[n] for n in parameter_names]
+        else:
+            raise ValueError(
+                "Keyword `bounds` must be a dictionary "
+                "`{'parameter_name': (lower, upper), ... }`"
+            )
+
+        # Make a 2 x d tensor of bounds for PyTorch and BoTorch.
+        torch_bounds = torch.tensor(bounds).T
+
+        self.parameter_names = parameter_names
+        self.bounds = bounds
+        self.torch_bounds = torch_bounds
+        self.optimizer = optimizer
+        self.max_opt_iters = max_opt_iters
+        self._gp = gp
+        self.gp_params = gp_params
+
+        self.input_dim = input_dim
+        self.is_sampling = False  # set to True once in sampling phase
+
+        self.initial_evidence = initial_evidence
+        self.initial_X = None
+        self.initial_Y = None
+
+        self.first_update = False
+
+    # def __str__(self):
+    #     """Return __str__ of underlying Gaussian Process."""
+    #     return self._gp.__str__()
+
+    # def __repr__(self):
+    #     """Return __repr__ of underlying Gaussian Process."""
+    #     return self._gp.__repr__()
+
+    def predict(self, x, noiseless=True, numpy_output=True):
+        """
+        Return the GP model posterior mean and variance at x.
+
+        Fast variance inference is made with LOVE via fast_pred_var().
+        For accurate variance inference, you can just comment out the
+        part.
+
+        :param x: np.array
+            NumPy compatible (n, input_dim) array of points to evaluate
+            if ``len(x.shape) == 1`` will be cast to 2D with
+            ``x[None, :]``.
+        :param noiseless: bool
+            Whether to include the noise variance or not to the returned
+            variance.
+        :param numpy_output: bool
+            Compatibility with ELFI built-in methods. Set to False to
+            retain PyTorch Tensor output.
+
+        :returns:
+            Returns as expected by ELFI:
+            - pred.mean; torch.tensor, the predictive mean
+            - pred.variance; torch.tensor, the predictive variance
+            Shape:
+            GP posterior (mean, var) at x where
+                mean : np.array
+                    with shape ``(x.shape[0], 1)``
+                var : np.array
+                    with shape ``(x.shape[0], 1)``
+        """
+
+        # Make sure that the GP is in "computing predictions" mode.
+        self._gp.eval()
+        self._gp.likelihood.eval()
+
+        x = torch.Tensor(x)
+
+        if not noiseless:
+            raise NotImplementedError(
+                "GP prediction with additional diagonal kernel noise not "
+                "implemented."
+            )
+
+        try:
+            with gpytorch.settings.fast_pred_var():
+                pred = self._gp.likelihood(self._gp(x))
+        except RuntimeError:
+            warnings.warn("Cholesky failed. Adding more jitter...")
+            with (
+                gpytorch.settings.cholesky_jitter(float=1e-2)
+            ):
+                pred = self._gp.likelihood(self._gp(x))
+
+        if numpy_output:
+            return pred.mean.detach().numpy(), pred.variance.detach().numpy()
+        else:
+            return pred.mean, pred.variance
+
+    def predict_mean(self, x, numpy_output=True):
+        """
+        Return the GP model posterior mean function at x.
+
+        :param numpy_output: bool
+            Compatibility with ELFI built-in methods. Set to False to
+            retain PyTorch Tensor output.
+        :returns:
+            np.array with shape ``(x.shape[0], 1)``.
+        """
+
+        return self.predict(x, numpy_output=numpy_output)[0]
+
+    def predictive_gradients(self, x, numpy_output=True):
+        """
+        Return the gradients of the GP model posterior mean and variance.
+
+        Given a set of points at which to predict X* (size [N*,Q]),
+        compute the derivatives of the mean and variance.
+        Resulting arrays are sized:
+            dmu_dX* -- [N*, Q ,D], where D is the number of output in
+            this GP (usually one).
+
+        Note that this is not the same as computing the mean and
+        variance of the derivative of the function!
+            dv_dX*  -- [N*, Q], since all outputs have same variance.
+
+        :rtype: [np.ndarray (N*, Q ,D), np.ndarray (N*,Q) ]
+
+        :param x: np.array
+            NumPy compatible (n, input_dim) array of points to evaluate
+            if ``len(x.shape) == 1`` will be cast to 2D with
+            ``x[None, :]``.
+        :param numpy_output: bool
+            Compatibility with ELFI built-in methods. Set to False to
+            retain PyTorch Tensor output.
+        :returns:
+            tuple
+                GP (grad_mean, grad_var) at x where
+                    grad_mean : np.array
+                        with shape ``(x.shape[0], input_dim)``
+                    grad_var : np.array
+                        with shape ``(x.shape[0], input_dim)``
+        """
+
+        self._gp.eval()
+        self._gp.likelihood.eval()
+
+        def pred_evaluation(X_eval):
+            try:
+                with gpytorch.settings.fast_pred_var():
+                    pred = self._gp.likelihood(self._gp(X_eval))
+            except RuntimeError:
+                warnings.warn("Cholesky failed. Adding more jitter...")
+                with gpytorch.settings.cholesky.jitter(float=1e-2):
+                    pred = self._gp.likelihood(self._gp(X_eval))
+            return pred
+
+        # Note how the gradient is stored in X_mean rather than the
+        # function.
+        X_mean = torch.autograd.Variable(torch.Tensor(x), requires_grad=True)
+        pred_for_mean = pred_evaluation(X_mean)
+        pred_mean_grad_term = pred_for_mean.mean.sum()
+        pred_mean_grad_term.backward()
+        pred_mean_grad = X_mean.grad
+
+        X_var = torch.autograd.Variable(torch.Tensor(x), requires_grad=True)
+        pred_for_var = pred_evaluation(X_var)
+        pred_var_grad_term = pred_for_var.variance.sum()
+        pred_var_grad_term.backward()
+        pred_var_grad = X_var.grad
+
+        if numpy_output:
+            return (
+                pred_mean_grad.detach().numpy(), pred_var_grad.detach().numpy()
+            )
+        else:
+            return pred_mean_grad, pred_var_grad
+
+    def predictive_gradient_mean(self, x, numpy_output=True):
+        """
+        Return the gradient of the GP model posterior mean at x.
+
+
+        :param x: np.array
+            NumPy compatible (n, input_dim) array of points to evaluate
+            if ``len(x.shape) == 1`` will be cast to 2D with
+            ``x[None, :]``.
+        :param numpy_output: bool
+            Compatibility with ELFI built-in methods. Set to False to
+            retain PyTorch Tensor output.
+        :returns:
+            np.array with shape ``(x.shape[0], input_dim)``.
+        """
+
+        return self.predictive_gradients(x, numpy_output=numpy_output)[0]
+
+    def update(self, x, y, optimize=False):
+        """
+        Update the GP model with new data.
+
+        :param inputs:
+            A torch.Tensor of the shape
+            "b1 x ... x bk x m x d"
+            or
+            "f x b1 x ... x bk x m x d".
+            Locations of (fantasy) observations.
+        :param targets:
+            A torch:Tensor of the shape
+            "b1 x ... x bk x m"
+            or
+            "f x b1 x ... x bk x m".
+            Labels of (fantasy) observations.
+        :param optimize:
+            Whether or not to optimize hyperparameters.
+        :returns:
+            An ``ExactGP`` model with ``n + m`` training examples, where
+            the ``m`` fantasy examples have been added and all test-time
+            caches have been updated.
+        """
+
+        x = torch.tensor(x, dtype=torch.double)
+        y = torch.tensor(y, dtype=torch.double).unsqueeze(-1)
+        initial_optimize = self.first_update
+
+        if self.initial_X is None:
+            self.initial_X = x
+            self.initial_Y = y
+            self._gp = BOLFIModel(
+                self.initial_X, self.initial_Y, bounds=self.bounds
+            )
+            self._gp.set_train_data(self.X, self.Y, strict=False)
+        elif self.initial_evidence > len(self.initial_X):
+            self.initial_X = torch.cat((self.initial_X, x))
+            self.initial_Y = torch.cat((self.initial_Y, y))
+            self._gp = BOLFIModel(
+                self.initial_X, self.initial_Y, bounds=self.bounds
+            )
+            self._gp.set_train_data(self.X, self.Y, strict=False)
+            # Use two passes (the optimizer gets stored between samples.
+            if len(self.initial_X) >= self.initial_evidence - 2:
+                optimize = True
+        else:
+            # "get_fantasy_model" updates the GP, but discards previous
+            # hyperparameter training, as the model gets replaced and
+            # hence the links get broken.
+            # self._gp = self._gp.get_fantasy_model(x[0], y[0])
+            self._gp.set_train_data(
+                torch.concatenate((self.X, x)),
+                torch.concatenate((self.Y, y[0])),
+                strict=False,
+            )
+
+        if optimize or initial_optimize:
+            self.optimize()
+            self.first_update = False
+
+    def optimize(self):
+        """Optimize GP hyperparameters. Use Adam by default."""
+        self._gp.train()
+        self._gp.likelihood.train()
+        optimizer = self.optimizer if self.optimizer else torch.optim.Adam(
+            self._gp.parameters(), lr=0.1
+        )  # Includes GaussianLikelihood parameters
+        # One possible loss function for GPs: marginal log likehood.
+        mll = gpytorch.mlls.ExactMarginalLogLikelihood(
+            self._gp.likelihood, self._gp
+        )
+        for i in range(self.max_opt_iters):
+            # Delete gradients from previous iteration by overwriting with 0.
+            optimizer.zero_grad()
+            # Output from model.
+            output = self._gp(self.X)
+            # Calculate loss and backpropagation gradients.
+            loss = -mll(output, self.Y)
+            loss.backward()
+            # print(
+            #     'Iter %d/%d - Loss: %.3f' % (
+            #         i + 1, self.max_opt_iters, loss.item(),
+            #     )
+            # )
+            optimizer.step()
+
+    @property
+    def n_evidence(self):
+        """Return the number of observed samples."""
+        if self._gp is None:
+            return 0
+        return len(self._gp.train_inputs[0])
+
+    @property
+    def X(self):
+        """Return input evidence."""
+        return self._gp.train_inputs[0]
+
+    @property
+    def Y(self):
+        """Return output evidence."""
+        return self._gp.train_targets
+
+    @property
+    def noise(self):
+        """Return the noise."""
+        # In GPy:
+        # return self._gp.Gaussian_noise.variance[0]
+        raise NotImplementedError(
+            "No additional noise to GP implemented."
+        )
+
+    @property
+    def instance(self):
+        """Return the gp instance."""
+        return self._gp
+
+    def copy(self):
+        """Return a copy of current instance."""
+        kopy = copy.copy(self)
+        if self._gp:
+            kopy._gp = self._gp.copy()
+
+        # if 'kernel' in self.gp_params:
+        #     kopy.gp_params['kernel'] = self.gp_params['kernel'].copy()
+
+        # if 'mean_function' in self.gp_params:
+        #     kopy.gp_params['mean_function'] = (
+        #         self.gp_params['mean_function'].copy()
+        #     )
+
+        return kopy
+
+    def __copy__(self):
+        """Return a copy of current instance."""
+        return self.copy()
diff --git a/elfi/methods/inference/gpytorch_bolfi.py b/elfi/methods/inference/gpytorch_bolfi.py
new file mode 100644
index 00000000..d7991152
--- /dev/null
+++ b/elfi/methods/inference/gpytorch_bolfi.py
@@ -0,0 +1,601 @@
+# SPDX-FileCopyrightText: 2024 Yannick Kuhn <Yannick.Kuhn@dlr.de>
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+from botorch.acquisition.analytic import UpperConfidenceBound
+from botorch.generation.gen import gen_candidates_scipy, get_best_candidates
+from botorch.optim.initializers import gen_batch_initial_conditions
+from elfi.loader import get_sub_seed
+from elfi.methods.inference.bolfi import BayesianOptimization, BOLFI
+import elfi.methods.mcmc as mcmc
+from elfi.methods.posteriors import BolfiPosterior
+from elfi.methods.results import BolfiSample, OptimizationResult
+from elfi.methods.utils import arr2d_to_batch  # , resolve_sigmas
+from elfi.model.elfi_model import NodeReference
+from elfi.model.extensions import ModelPrior
+import gpytorch
+import numpy as np
+import pyro
+from pyro.infer.mcmc import NUTS, MCMC
+import torch
+
+from elfi.methods.bo.botorch_acquisition import BoTorchLCBSC
+from elfi.methods.bo.gpytorch_regression import GPyTorchRegression
+
+
+class GPyTorchBayesianOptimization(BayesianOptimization):
+    """Bayesian Optimization of an unknown target function."""
+
+    def __init__(
+        self,
+        model,
+        target_name=None,
+        bounds=None,
+        initial_evidence=None,
+        update_interval=10,
+        target_model=None,
+        acquisition_method=None,
+        acq_noise_var=0,
+        exploration_rate=10,
+        batch_size=1,
+        batches_per_acquisition=None,
+        async_acq=False,
+        pyro_torch_prior=None,
+        **kwargs
+    ):
+        """
+        Initialize Bayesian optimization.
+
+        :param model: ElfiModel, NodeReference
+        :param target_name: str, NodeReference.+
+            Only needed if model is an ElfiModel
+        :param bounds : dict, optional
+            The region where to estimate the posterior for each
+            parameter in ``model.parameters``:
+            ``dict('parameter_name':(lower, upper), ... )``.
+            Not used if custom `target_model` is given.
+        :param initial_evidence: int, dict, optional
+            Number of initial evidence or a precomputed batch dict
+            containing parameter and discrepancy values. Default value
+            depends on the dimensionality.
+        :param update_interval: int, optional
+            How often to update the GP hyperparameters of the
+            `target_model`.
+        :param target_model: GPyRegression, optional
+        :param acquisition_method: AnalyticAcquisitionFunction, optional
+            Method of acquiring evidence points. Defaults to LCBSC.
+        :param acq_noise_var: float, dict, optional
+            Variance(s) of the noise added in the default LCBSC
+            acquisition method. If a dictionary, values should be float
+            specifying the variance for each dimension.
+        :param exploration_rate: float, optional
+            Exploration rate of the acquisition method
+        :param batch_size: int, optional
+            Elfi batch size. Defaults to 1.
+        :param batches_per_acquisition: int, optional
+            How many batches will be requested from the acquisition
+            function at one go. Defaults to `max_parallel_batches`.
+        :param async_acq: bool, optional
+            Allow acquisitions to be made asynchronously, i.e., do not
+            wait for all the results from the previous acquisition
+            before making the next. This can be more efficient with a
+            large amount of workers (e.g. in cluster environments) but
+            foregoes the guarantee for the exactly same result with the
+            same initial conditions (e.g. the seed). Default: False.
+        :param pyro_torch_prior: torch.distributions, optional
+            (Temporarily) needed as a by-pass of the internal processing
+            of the elfi.Prior, if the trained model has to be sampled.
+            Provide as the mathematically same object as the elfi.Prior.
+            If a series of elfi.Priors was given for a multivariate
+            distribution, give this as a multivariate distribution.
+        :param **kwargs:
+        """
+        self.pyro_torch_prior = pyro_torch_prior
+        target_model = target_model or GPyTorchRegression(
+            (
+                model.model.parameter_names
+                if isinstance(model, NodeReference) else
+                model.parameter_names
+            ),
+            bounds=bounds,
+            initial_evidence=initial_evidence,
+            **kwargs
+        )
+        super(GPyTorchBayesianOptimization, self).__init__(
+            model,
+            target_name=target_name,
+            bounds=bounds,
+            initial_evidence=initial_evidence,
+            update_interval=update_interval,
+            target_model=target_model,
+            acquisition_method=acquisition_method,
+            acq_noise_var=acq_noise_var,
+            exploration_rate=exploration_rate,
+            batch_size=batch_size,
+            batches_per_acquisition=batches_per_acquisition,
+            async_acq=async_acq,
+            # **kwargs
+        )
+        if acquisition_method is None:
+            self.acquisition_method = BoTorchLCBSC(
+                self.target_model,
+                noise_var=acq_noise_var,
+                exploration_rate=exploration_rate,
+                seed=self.seed
+            )
+
+    def extract_result(self):
+        """
+        Extract the result from the current state.
+        Overrides the SciPy-based method in ELFI.
+
+        :returns:
+            OptimizationResult
+        """
+        # SciPy version for GPy.
+        # x_min, _ = stochastic_optimization(
+        #     self.target_model.predict_mean,
+        #     self.target_model.bounds, seed=self.seed
+        # )
+        bounds = self.target_model.torch_bounds
+        # Re-use the Upper Confidence Bound as Mean with beta = 0.
+        # Reminder: ucb = mean + sqrt(beta * variance). With
+        # maximize = False, it is ucb = -mean + sqrt(beta * variance).
+        mean = UpperConfidenceBound(
+            self.target_model._gp, beta=0, maximize=False
+        )
+        x_init = gen_batch_initial_conditions(
+            mean,
+            bounds,
+            q=1,
+            num_restarts=25,
+            raw_samples=500 * 2**self.target_model.input_dim
+        )
+        # gen_candidates_scipy gives the minimum without noise.
+        x_min, _ = gen_candidates_scipy(
+            initial_conditions=x_init,
+            acquisition_function=mean,
+            lower_bounds=bounds[0],
+            upper_bounds=bounds[1],
+        )
+        x_min = x_min[0].T.detach().numpy()
+
+        batch_min = arr2d_to_batch(
+            x_min, self.target_model.parameter_names
+        )
+        outputs = arr2d_to_batch(
+            self.target_model.X.detach().numpy(),
+            self.target_model.parameter_names
+        )
+
+        # batch_min = arr2d_to_batch(x_min, self.parameter_names)
+        # outputs = arr2d_to_batch(
+        #     self.target_model.X, self.parameter_names
+        # )
+        outputs[self.target_name] = self.target_model.Y[0]
+
+        return OptimizationResult(
+            x_min=batch_min,
+            outputs=outputs,
+            **self._extract_result_kwargs()
+        )
+
+
+# The order of inheritance is the order as it is written here.
+# Hence, GPyTorchBayesianOptimization.__init__ gets inherited,
+# not BOLFI.__init__, which was inherited from BayesianOptimization.
+class GPyTorchBOLFI(GPyTorchBayesianOptimization, BOLFI):
+    """
+    Bayesian Optimization for Likelihood-Free Inference (BOLFI).
+
+    Approximates the discrepancy function by a stochastic regression
+    model. Discrepancy model is fit by sampling the discrepancy function
+    at points decided by the acquisition function.
+    The method implements the framework introduced in
+    Gutmann & Corander, 2016.
+
+    References
+    ----------
+    Gutmann M U, Corander J (2016). Bayesian Optimization for Likelihood
+    -Free Inference of Simulator-Based Statistical Models.
+    JMLR 17(125):1−47, 2016.
+    http://jmlr.org/papers/v17/15-017.html
+    """
+
+    def extract_posterior(self, threshold=None):
+        """
+        Return an object representing the approximate posterior.
+
+        The approximation is based on surrogate model regression.
+
+        :param threshold: float, optional
+            Discrepancy threshold for creating the posterior (log with
+            log discrepancy).
+
+        :returns:
+            elfi.methods.posteriors.BolfiPosterior
+        """
+        if self.state['n_evidence'] == 0:
+            raise ValueError(
+                'Model is not fitted yet, please see the `fit` method.')
+
+        # Original ELFI implementation.
+        # prior = ModelPrior(
+        #     self.model,
+        #     parameter_names=self.target_model.parameter_names
+        # )
+        # if self.pyro_torch_prior is None:
+        #     print(
+        #         "Warning: Translation from elfi.model.extensions."
+        #         "ModelPrior into a PyTorch-compatible distribution "
+        #         "not implemented."
+        #     )
+        # prior = self.pyro_torch_prior
+        prior = ModelPrior(
+            self.model,
+            parameter_names=self.target_model.parameter_names
+        )
+        if threshold is None:
+            bolfi_model = self.target_model._gp
+            bounds = self.target_model.torch_bounds
+            mean = UpperConfidenceBound(
+                bolfi_model, beta=0, maximize=False
+            )
+            x_init = gen_batch_initial_conditions(
+                mean,
+                bounds,
+                q=1,
+                num_restarts=25,
+                raw_samples=500 * 2**self.target_model.input_dim
+            )
+            # gen_candidates_scipy gives the minimum without noise.
+            batch_candidates, batch_acq_values = gen_candidates_scipy(
+                initial_conditions=x_init,
+                acquisition_function=mean,
+                lower_bounds=bounds[0],
+                upper_bounds=bounds[1],
+            )
+            candidate = get_best_candidates(
+                batch_candidates, -batch_acq_values
+            )
+            with gpytorch.settings.fast_computations(False, False, False):
+                threshold = bolfi_model.likelihood(
+                    bolfi_model(candidate)
+                ).mean.cpu().detach().numpy()
+        return BolfiPosterior(
+            self.target_model, threshold=threshold, prior=prior
+        )
+
+    def sample(
+        self,
+        n_samples,
+        warmup=None,
+        n_chains=4,
+        threshold=None,
+        initials=None,
+        algorithm='nuts',
+        n_evidence=None,
+        **kwargs
+    ):
+        r"""
+        Sample the posterior distribution of BOLFI.
+
+        Here the likelihood is defined through the cumulative density
+        function of the standard normal distribution:
+        L(\theta) \propto F((h-\mu(\theta)) / \sigma(\theta))
+        where h is the threshold, and \mu(\theta) and \sigma(\theta) are
+        the posterior mean and (noisy) standard deviation of the
+        associated Gaussian process. The sampling is performed with an
+        MCMC sampler (the No-U-Turn Sampler, NUTS).
+
+        :param n_samples: int
+            Number of requested samples from the posterior for each
+            chain. This includes warmup, and note that the effective
+            sample size is usually considerably smaller.
+        :param warmup: int, optional
+            Length of warmup sequence in MCMC sampling. Defaults to
+            ``n_samples//2``.
+        :param n_chains: int, optional
+            Number of independent chains.
+        :param threshold: float, optional
+            The threshold (bandwidth) for posterior (give as log if log
+            discrepancy).
+        :param initials: array of shape (n_chains, n_params), optional
+            Initial values for the sampled parameters for each chain.
+            Defaults to best evidence points.
+        :param algorithm: string, optional
+            Sampling algorithm to use. Currently 'nuts' (default) is
+            supported only.
+        :param n_evidence: int
+            If the regression model is not fitted yet, specify the
+            amount of evidence.
+        :returns:
+            BolfiSample
+        """
+        # ToDo: change from ELFI-NUTS to Pyro-NUTS.
+        # The problem: Pyro-NUTS does not support unnormalized
+        # distributions.
+
+        if self.state['n_batches'] == 0:
+            self.fit(n_evidence)
+
+        # TODO: add more MCMC algorithms
+        if algorithm not in ['nuts', 'metropolis']:
+            raise ValueError("Unknown posterior sampler.")
+        if algorithm == 'metropolis':
+            raise NotImplementedError(
+                "Metropolis sampler not implemented. Use 'nuts' insteaad."
+            )
+
+        posterior = self.extract_posterior(threshold)
+        warmup = warmup or n_samples // 2
+
+        # Unless given, select the evidence points with smallest
+        # discrepancy
+        if initials is not None:
+            if np.asarray(initials).shape != (
+                n_chains, self.target_model.input_dim
+            ):
+                raise ValueError(
+                    "The shape of initials must be (n_chains, n_params).")
+        else:
+            inds = torch.argsort(self.target_model.Y)
+            initials = self.target_model.X[inds].detach().numpy()
+
+        # enables caching for default RBF kernel
+        self.target_model.is_sampling = True
+
+        tasks_ids = []
+        ii_initial = 0
+        """
+        if algorithm == 'metropolis':
+            # ToDo: provide initial sigma proposals.
+            sigma_proposals = resolve_sigmas(
+                self.target_model.parameter_names,
+                sigma_proposals,
+                self.target_model.bounds
+            )
+        """
+
+        # sampling is embarrassingly parallel, so depending on
+        # self.client this may parallelize
+        for ii in range(n_chains):
+            seed = get_sub_seed(self.seed, ii)
+            # discard bad initialization points
+            while np.isinf(posterior.logpdf(initials[ii_initial])):
+                ii_initial += 1
+                if ii_initial == len(inds):
+                    raise ValueError(
+                        "BOLFI.sample: Cannot find enough acceptable "
+                        "initialization points!"
+                    )
+
+            if algorithm == 'nuts':
+                tasks_ids.append(
+                    self.client.apply(
+                        mcmc.nuts,
+                        n_samples,
+                        initials[ii_initial],
+                        posterior.logpdf,
+                        posterior.gradient_logpdf,
+                        n_adapt=warmup,
+                        seed=seed,
+                        **kwargs))
+            """
+            elif algorithm == 'metropolis':
+                tasks_ids.append(
+                    self.client.apply(
+                        mcmc.metropolis,
+                        n_samples,
+                        initials[ii_initial],
+                        posterior.logpdf,
+                        sigma_proposals,
+                        warmup,
+                        seed=seed,
+                        **kwargs))
+            """
+            ii_initial += 1
+
+        # get results from completed tasks or run sampling
+        # (client-specific)
+        chains = []
+        for id in tasks_ids:
+            chains.append(self.client.get_result(id))
+
+        chains = np.asarray(chains)
+        print(
+            "{} chains of {} iterations acquired. Effective sample size and "
+            "Rhat for each parameter:".format(n_chains, n_samples)
+        )
+        for ii, node in enumerate(self.target_model.parameter_names):
+            print(node, mcmc.eff_sample_size(chains[:, :, ii]),
+                  mcmc.gelman_rubin_statistic(chains[:, :, ii]))
+        self.target_model.is_sampling = False
+
+        return BolfiSample(
+            method_name='BOLFI',
+            chains=chains,
+            parameter_names=self.target_model.parameter_names,
+            warmup=warmup,
+            threshold=float(posterior.threshold),
+            n_sim=self.state['n_evidence'],
+            seed=self.seed)
+
+    def sample_torch_prototype(
+        self,
+        n_samples,
+        warmup=None,
+        n_chains=4,
+        threshold=None,
+        initials=None,
+        algorithm='nuts',
+        n_evidence=None,
+        **kwargs
+    ):
+        r"""
+        Sample the posterior distribution of BOLFI.
+
+        Here the likelihood is defined through the cumulative density
+        function of the standard normal distribution:
+        L(\theta) \propto F((h-\mu(\theta)) / \sigma(\theta))
+        where h is the threshold, and \mu(\theta) and \sigma(\theta) are
+        the posterior mean and (noisy) standard deviation of the
+        associated Gaussian process. The sampling is performed with an
+        MCMC sampler (the No-U-Turn Sampler, NUTS).
+        :param n_samples: int
+            Number of requested samples from the posterior for each
+            chain. This includes warmup, and note that the effective
+            sample size is usually considerably smaller.
+        :param warmup: int, optional
+            Length of warmup sequence in MCMC sampling. Defaults to
+            ``n_samples//2``.
+        :param n_chains: int, optional
+            Number of independent chains.
+        :param threshold: float, optional
+            The threshold (bandwidth) for posterior (give as log if log
+            discrepancy).
+        :param initials: array of shape (n_chains, n_params), optional
+            Initial values for the sampled parameters for each chain.
+            Defaults to best evidence points.
+        :param algorithm: string, optional
+            Sampling algorithm to use. Currently 'nuts' (default) is
+            supported only.
+        :param n_evidence: int
+            If the regression model is not fitted yet, specify the
+            amount of evidence.
+        :returns:
+            BolfiSample
+        """
+
+        if self.state['n_batches'] == 0:
+            self.fit(n_evidence)
+
+        if algorithm not in ['nuts',]:
+            raise ValueError("Unknown posterior sampler.")
+
+        posterior = self.extract_posterior(threshold)
+        warmup = warmup or n_samples // 2
+
+        # Unless given, select the evidence points with smallest
+        # discrepancy
+        if initials is not None:
+            if torch.asarray(initials).shape != (
+                n_chains, self.target_model.input_dim
+            ):
+                raise ValueError(
+                    "The shape of initials must be (n_chains, n_params)."
+                )
+        else:
+            inds = torch.argsort(self.target_model.Y)
+            initials = torch.asarray(self.target_model.X[inds])
+
+        self.target_model.is_sampling = True
+
+        print(
+            "ToDo: check if CDF transform of prior distribution is the same "
+            "as the distribution given by prior multiplied with CDF."
+        )
+
+        self.target_model._gp.eval()
+        self.target_model._gp.likelihood.eval()
+
+        def pyro_model():
+            # Settings (matrices have "small" shape
+            # sample_size x sample_size):
+            # covar_root_decomposition False,
+            # log_prob False,
+            # fast_solves False
+            x = self.pyro_torch_prior
+            with gpytorch.settings.fast_computations(False, False, False):
+                gp_eval = self.target_model._gp.likelihood(
+                    self.target_model._gp(x)
+                )
+            # The transform from GP mean and variance to "GP CDF".
+            t = pyro.distributions.transforms.CumulativeDistributionTransform(
+                pyro.distributions.Normal(
+                    threshold - gp_eval.mean, torch.sqrt(gp_eval.variance)
+                )
+            )
+            # Uniform distribution that might lead to Likelihood
+            # sampling.
+            # lower_bounds = torch.tensor([
+            #     interval[0] for interval in self.bounds.values()
+            # ])
+            # upper_bounds = torch.tensor([
+            #     interval[1] for interval in self.bounds.values()
+            # ])
+            # u = torch.distributions.Uniform(
+            #     lower_bounds, upper_bounds
+            # )
+            return pyro.sample(
+                "posterior",
+                pyro.distributions.TransformedDistribution(
+                    posterior.prior, [t]
+                )
+            )
+
+        # seed = get_sub_seed(self.seed, ii)
+        # discard bad initialization points
+        # while torch.isinf(torch.asarray(
+        #     posterior.logpdf(initials[ii_initial])
+        # )):
+        #     ii_initial += 1
+        #     if ii_initial == len(inds):
+        #         raise ValueError(
+        #             "BOLFI.sample: Cannot find enough acceptable "
+        #             "initialization points!"
+        #         )
+
+        if algorithm == 'nuts':
+            print("ToDo: pass initial parameters to Pyro-NUTS.")
+            print("ToDo: pass seed to Pyro-NUTS.")
+            print("ToDo: switch from Pyro to NumPyro for performance.")
+            init_params, potential_fn, transforms, _ = (
+                pyro.infer.mcmc.util.initialize_model(
+                    pyro_model,
+                    model_args=(initials[0],),
+                    num_chains=n_chains,
+                )
+            )
+            nuts_kernel = NUTS(potential_fn=potential_fn)
+            mcmc_run = MCMC(
+                nuts_kernel,
+                num_samples=n_samples,
+                warmup_steps=warmup,
+                num_chains=n_chains,
+                initial_params=init_params,
+                transforms=transforms,
+                disable_progbar=False,
+            )
+            mcmc_run.run()
+            chains = mcmc_run.get_samples()  # ['posterior']
+            print(chains)
+
+        # get results from completed tasks or run sampling
+        # (client-specific)
+        chains = torch.asarray(chains)
+        print(
+            "{} chains of {} iterations acquired. Effective sample size and "
+            "Rhat for each parameter:".format(n_chains, n_samples)
+        )
+        # for ii, node in enumerate(self.target_model.parameter_names):
+        #     print(
+        #         node,
+        #         eff_sample_size(chains[:, :, ii]),
+        #         gelman_rubin_statistic(chains[:, :, ii])
+        #     )
+        mcmc_run.summary(prob=0.95)
+        self.target_model.is_sampling = False
+
+        self.target_model._gp.train()
+        self.target_model._gp.likelihood.train()
+
+        return BolfiSample(
+            method_name='BOLFI',
+            chains=chains,
+            parameter_names=self.target_model.parameter_names,
+            warmup=warmup,
+            threshold=float(posterior.threshold),
+            n_sim=self.state['n_evidence'],
+            seed=self.seed,
+        )