Skip to content

SMC Sampling fails with multivariate distribution #312

@mathDR

Description

@mathDR

Summary:

When a sequence of thresholds having length > 1 is passed to smc, the inference fails with ValueError: In executing node '_theta_logpdf': Vector 'x' must have either the same number of entries as, or one entry fewer than, parameter vector 'a', but alpha.shape = (10,) and x.shape = (100000,)..

Description:

Using a simulator of a multinomial model, having a dirichlet prior, attempting to run smc for a sequence of thresholds produces an error.

Reproducible Steps:

import numpy as np
import scipy.stats as ss
import elfi
num_categories = 10
num_experiments = 100
theta = elfi.Prior('dirichlet', np.ones(num_categories), name='theta')
def simulator(num_experiments, theta, batch_size=1, random_state=None):
    if len(theta.shape)==1:
        theta = theta.reshape(1,-1)
    return_value = []

    for i in range(theta.shape[0]):
        return_value.append(
            ss.multinomial.rvs(
                100, 
                theta[i,:], 
                random_state=random_state
            )
        )
    return np.array(return_value)
                       
def mean(y):
    return np.mean(y, axis=1)

def ret_identity(y):
    return y

def distance_in_variation(p1,observed=[]):
    arr_p1 = np.asarray(p1)
    y = np.tile(observed[0],(np.asarray(p1).shape[0],1))
    
    return 0.5*np.sum(np.abs(arr_p1-y),axis=1)

theta0 = np.random.dirichlet(np.ones(num_categories))
np.random.seed(20191126) 
y0 = simulator(num_experiments,theta0)

from functools import partial
# Add the simulator node and observed data to the model
sim_fn = partial(simulator,num_experiments)
sim = elfi.Simulator(sim_fn, theta, observed=y0)
d = elfi.Discrepancy(
    distance_in_variation, 
    elfi.Summary(ret_identity, sim, name='ret_identity'), 
    name='d')

smc = elfi.SMC(d, batch_size=10000)

N = 1000
schedule = [100,99]
%time result_smc = smc.sample(N, schedule)

Current Output:

When the threshold schedule is [100], everything runs. When it is changed to [100,99] it fails with the error:

Progress: |█████████████████████████-------------------------| 50.0% Complete
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
~/.pyenv/versions/3.7.4/envs/work/lib/python3.7/site-packages/elfi/executor.py in execute(cls, G)
     69                 try:
---> 70                     G.node[node] = cls._run(op, node, G)
     71                 except Exception as exc:

~/.pyenv/versions/3.7.4/envs/work/lib/python3.7/site-packages/elfi/executor.py in _run(fn, node, G)
    153 
--> 154         output_dict = {'output': fn(*args, **kwargs)}
    155         return output_dict

~/.pyenv/versions/3.7.4/envs/work/lib/python3.7/site-packages/scipy/stats/_multivariate.py in logpdf(self, x, alpha)
   1448         alpha = _dirichlet_check_parameters(alpha)
-> 1449         x = _dirichlet_check_input(alpha, x)
   1450 

~/.pyenv/versions/3.7.4/envs/work/lib/python3.7/site-packages/scipy/stats/_multivariate.py in _dirichlet_check_input(alpha, x)
   1242                          "parameter vector 'a', but alpha.shape = %s "
-> 1243                          "and x.shape = %s." % (alpha.shape, x.shape))
   1244 

ValueError: Vector 'x' must have either the same number of entries as, or one entry fewer than, parameter vector 'a', but alpha.shape = (10,) and x.shape = (100000,).

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
<timed exec> in <module>

~/.pyenv/versions/3.7.4/envs/work/lib/python3.7/site-packages/elfi/methods/parameter_inference.py in sample(self, n_samples, *args, **kwargs)
    404         bar = kwargs.pop('bar', True)
    405 
--> 406         return self.infer(n_samples, *args, bar=bar, **kwargs)
    407 
    408     def _extract_result_kwargs(self):

~/.pyenv/versions/3.7.4/envs/work/lib/python3.7/site-packages/elfi/methods/parameter_inference.py in infer(self, vis, bar, *args, **kwargs)
    262 
    263         while not self.finished:
--> 264             self.iterate()
    265             if vis:
    266                 self.plot_state(interactive=True, **vis_opt)

~/.pyenv/versions/3.7.4/envs/work/lib/python3.7/site-packages/elfi/methods/parameter_inference.py in iterate(self)
    299         # Submit new batches if allowed
    300         while self._allow_submit(self.batches.next_index):
--> 301             next_batch = self.prepare_new_batch(self.batches.next_index)
    302             logger.debug("Submitting batch %d" % self.batches.next_index)
    303             self.batches.submit(next_batch)

~/.pyenv/versions/3.7.4/envs/work/lib/python3.7/site-packages/elfi/methods/parameter_inference.py in prepare_new_batch(self, batch_index)
    716         params = GMDistribution.rvs(*self._gm_params, size=self.batch_size,
    717                                     prior_logpdf=self._prior.logpdf,
--> 718                                     random_state=self._round_random_state)
    719 
    720         batch = arr2d_to_batch(params, self.parameter_names)

~/.pyenv/versions/3.7.4/envs/work/lib/python3.7/site-packages/elfi/methods/utils.py in rvs(cls, means, cov, weights, size, prior_logpdf, random_state)
    229             # check validity of x
    230             if prior_logpdf is not None:
--> 231                 x = x[np.isfinite(prior_logpdf(x))]
    232 
    233             n_accepted1 = len(x)

~/.pyenv/versions/3.7.4/envs/work/lib/python3.7/site-packages/elfi/methods/utils.py in logpdf(self, x)
    356     def logpdf(self, x):
    357         """Return the log density of the joint prior at x."""
--> 358         return self._evaluate_pdf(x, log=True)
    359 
    360     def _evaluate_pdf(self, x, log=False):

~/.pyenv/versions/3.7.4/envs/work/lib/python3.7/site-packages/elfi/methods/utils.py in _evaluate_pdf(self, x, log)
    380             loaded_net.node[k] = {'output': v}
    381 
--> 382         val = self.client.compute(loaded_net)[node]
    383         if ndim == 0 or (ndim == 1 and self.dim > 1):
    384             val = val[0]

~/.pyenv/versions/3.7.4/envs/work/lib/python3.7/site-packages/elfi/client.py in compute(self, loaded_net)
    271     def compute(self, loaded_net):
    272         """Request evaluation of `loaded_net` and wait for result."""
--> 273         return self.apply_sync(Executor.execute, loaded_net)
    274 
    275     @property

~/.pyenv/versions/3.7.4/envs/work/lib/python3.7/site-packages/elfi/clients/native.py in apply_sync(self, kallable, *args, **kwargs)
     51 
     52         """
---> 53         return kallable(*args, **kwargs)
     54 
     55     def get_result(self, task_id):

~/.pyenv/versions/3.7.4/envs/work/lib/python3.7/site-packages/elfi/executor.py in execute(cls, G)
     71                 except Exception as exc:
     72                     raise exc.__class__("In executing node '{}': {}."
---> 73                                         .format(node, exc)).with_traceback(exc.__traceback__)
     74 
     75             elif 'output' not in attr:

~/.pyenv/versions/3.7.4/envs/work/lib/python3.7/site-packages/elfi/executor.py in execute(cls, G)
     68                 op = attr['operation']
     69                 try:
---> 70                     G.node[node] = cls._run(op, node, G)
     71                 except Exception as exc:
     72                     raise exc.__class__("In executing node '{}': {}."

~/.pyenv/versions/3.7.4/envs/work/lib/python3.7/site-packages/elfi/executor.py in _run(fn, node, G)
    152         args = [a[1] for a in sorted(args, key=itemgetter(0))]
    153 
--> 154         output_dict = {'output': fn(*args, **kwargs)}
    155         return output_dict
    156 

~/.pyenv/versions/3.7.4/envs/work/lib/python3.7/site-packages/scipy/stats/_multivariate.py in logpdf(self, x, alpha)
   1447         """
   1448         alpha = _dirichlet_check_parameters(alpha)
-> 1449         x = _dirichlet_check_input(alpha, x)
   1450 
   1451         out = self._logpdf(x, alpha)

~/.pyenv/versions/3.7.4/envs/work/lib/python3.7/site-packages/scipy/stats/_multivariate.py in _dirichlet_check_input(alpha, x)
   1241                          "of entries as, or one entry fewer than, "
   1242                          "parameter vector 'a', but alpha.shape = %s "
-> 1243                          "and x.shape = %s." % (alpha.shape, x.shape))
   1244 
   1245     if x.shape[0] != alpha.shape[0]:

ValueError: In executing node '_theta_logpdf': Vector 'x' must have either the same number of entries as, or one entry fewer than, parameter vector 'a', but alpha.shape = (10,) and x.shape = (100000,)..

Expected Output:

I expect the output to have samples similar to running with threshold [99].

ELFI Version:

0.7.4

Python Version:

3.7.4

Operating System:

MAC OSX

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions