A. Caterini
,
A. Doucet
,
D. Sejdinovic
,
Hamiltonian Variational Auto-Encoder, in Advances in Neural Information Processing Systems (NeurIPS), 2018, to appear.
Variational Auto-Encoders (VAEs) have become very popular techniques to perform inference and learning in latent variable models as they allow us to leverage the rich representational power of neural networks to obtain flexible approximations of the posterior of latent variables as well as tight evidence lower bounds (ELBOs). Combined with stochastic variational inference, this provides a methodology scaling to large datasets. However, for this methodology to be practically efficient, it is necessary to obtain low-variance unbiased estimators of the ELBO and its gradients with respect to the parameters of interest. While the use of Markov chain Monte Carlo (MCMC) techniques such as Hamiltonian Monte Carlo (HMC) has been previously suggested to achieve this, the proposed methods require specifying reverse kernels which have a large impact on performance. Additionally, the resulting unbiased estimator of the ELBO for most MCMC kernels is typically not amenable to the reparameterization trick. We show here how to optimally select reverse kernels in this setting and, by building upon Hamiltonian Importance Sampling (HIS), we obtain a scheme that provides low-variance unbiased estimators of the ELBO and its gradients using the reparameterization trick. This allows us to develop a Hamiltonian Variational Auto-Encoder (HVAE). This method can be reinterpreted as a target-informed normalizing flow which, within our context, only requires a few evaluations of the gradient of the sampled likelihood and trivial Jacobian calculations at each iteration.
@inproceedings{CatDouSej2018,
author = {Caterini, A.L. and Doucet, A. and Sejdinovic, D.},
title = {{{Hamiltonian Variational Auto-Encoder}}},
booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
pages = {to appear},
year = {2018}
}
G. Deligiannidis
,
A. Doucet
,
M. K. Pitt
,
The Correlated Pseudo-Marginal Method, JRSSB, vol. 80, no. 5, 839–870, 2018.
@article{deligiannidis2015correlated,
title = {The Correlated Pseudo-Marginal Method},
author = {Deligiannidis, George and Doucet, Arnaud and Pitt, Michael K},
journal = {JRSSB},
volume = {80},
number = {5},
pages = {839--870},
year = {2018}
}
@article{schmon2018large,
title = {Large Sample Asymptotics of the Pseudo-Marginal Method},
author = {Schmon, Sebastian M and Deligiannidis, George and Doucet, Arnaud and Pitt, Michael K},
journal = {arXiv preprint arXiv:1806.10060},
year = {2018}
}
L. Middleton
,
G. Deligiannidis
,
A. Doucet
,
P. E. Jacob
,
Unbiased Markov chain Monte Carlo for intractable target distributions, arXiv preprint arXiv:1807.08691, 2018.
@article{middleton2018unbiased,
title = {Unbiased Markov chain Monte Carlo for intractable target distributions},
author = {Middleton, Lawrence and Deligiannidis, George and Doucet, Arnaud and Jacob, Pierre E},
journal = {arXiv preprint arXiv:1807.08691},
year = {2018}
}
G. Deligiannidis
,
D. Paulin
,
A. Doucet
,
Randomized Hamiltonian Monte Carlo as Scaling Limit of the Bouncy Particle Sampler and Dimension-Free Convergence Rates, arXiv preprint arXiv:1808.04299, 2018.
@article{deligiannidis2018randomized,
title = {Randomized Hamiltonian Monte Carlo as Scaling Limit of the Bouncy Particle Sampler and Dimension-Free Convergence Rates},
author = {Deligiannidis, George and Paulin, Daniel and Doucet, Arnaud},
journal = {arXiv preprint arXiv:1808.04299},
year = {2018}
}
@article{SchmonDeligiannidisDoucet2018a,
author = {Schmon, Sebastian M and Deligiannidis, George and Doucet, Arnaud and Pitt, Michael K},
title = {Large Sample Asymptotics of the Pseudo-Marginal Algorithm},
journal = {https://arxiv.org/abs/1806.10060},
year = {2018}
}
2017
C. J. Maddison
,
D. Lawson
,
G. Tucker
,
N. Heess
,
M. Norouzi
,
A. Mnih
,
A. Doucet
,
Y. W. Teh
,
Filtering Variational Objectives, in Advances in Neural Information Processing Systems (NeurIPS), 2017.
The evidence lower bound (ELBO) appears in many algorithms for maximum likelihood estimation (MLE) with latent variables because it is a sharp lower bound of the marginal log-likelihood. For neural latent variable models, optimizing the ELBO jointly in the variational posterior and model parameters produces state-of-the-art results. Inspired by the success of the ELBO as a surrogate MLE objective, we consider the extension of the ELBO to a family of lower bounds defined by a Monte Carlo estimator of the marginal likelihood. We show that the tightness of such bounds is asymptotically related to the variance of the underlying estimator. We introduce a special case, the filtering variational objectives (FIVOs), which takes the same arguments as the ELBO and passes them through a particle filter to form a tighter bound. FIVOs can be optimized tractably with stochastic gradients, and are particularly suited to MLE in sequential latent variable models. In standard sequential generative modeling tasks we present uniform improvements over models trained with ELBO, including some whole nat-per-timestep improvements.
@inproceedings{MadLawTuc2017b,
author = {Maddison, C. J. and Lawson, D. and Tucker, G. and Heess, N. and Norouzi, M. and Mnih, A. and Doucet, A. and Teh, Y. W.},
booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
title = {Filtering Variational Objectives},
year = {2017},
month = dec,
bdsk-url-1 = {https://arxiv.org/pdf/1705.09279v1.pdf}
}
A. Bouchard-Côté
,
A. Doucet
,
A. Roth
,
Particle Gibbs Split-Merge Sampling for Bayesian Inference in Mixture Models, Journal of Machine Learning Research, vol. 18, no. 28, 1–39, Apr. 2017.
@article{pg-sm,
author = {Bouchard-C{\^o}t{\'e}, Alexandre and Doucet, Arnaud and Roth, Andrew},
journal = {Journal of Machine Learning Research},
month = apr,
number = {28},
pages = {1--39},
title = {{Particle Gibbs Split-Merge Sampling for Bayesian Inference in Mixture Models}},
volume = {18},
year = {2017}
}
A. Barbos
,
F. Caron
,
J. F. Giovannelli
,
A. Doucet
,
Clone MCMC: Parallel High-Dimensional Gaussian Gibbs Sampling, in Advances in Neural Information Processing Systems (NeurIPS), 2017.
@incollection{Barbos2017,
title = {Clone {MCMC}: Parallel High-Dimensional {G}aussian {G}ibbs Sampling},
author = {Barbos, A. and Caron, F. and Giovannelli, J. F. and Doucet, A.},
booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
year = {2017}
}
F. Caron
,
W. Neiswanger
,
F. Wood
,
A. Doucet
,
M. Davy
,
Generalized Pólya Urn for Time-Varying Pitman-Yor Processes, Journal of Machine Learning Research (JMLR), vol. 18, no. 27, 1–32, 2017.
@article{Caron2017a,
title = {Generalized {P}{\'o}lya Urn for Time-Varying Pitman-Yor Processes},
author = {Caron, F. and Neiswanger, W. and Wood, F. and Doucet, A. and Davy, M.},
journal = {Journal of Machine Learning Research (JMLR)},
year = {2017},
number = {27},
pages = {1-32},
volume = {18}
}
G. Deligiannidis
,
A. Bouchard-Côté
,
A. Doucet
,
Exponential Ergodicity of the Bouncy Particle Sampler, to appear in Annals of Statistics arXiv:1705.04579, 2017.
@article{deligiannidis2017exponential,
title = {Exponential Ergodicity of the Bouncy Particle Sampler},
author = {Deligiannidis, George and Bouchard-C{\^o}t{\'e}, Alexandre and Doucet, Arnaud},
journal = {to appear in Annals of Statistics arXiv:1705.04579},
year = {2017}
}
P. Vanetti
,
A. Bouchard-Côté
,
G. Deligiannidis
,
A. Doucet
,
Piecewise Deterministic Markov Chain Monte Carlo, arXiv preprint arXiv:1707.05296, 2017.
@article{vanetti2017piecewise,
title = {Piecewise Deterministic Markov Chain Monte Carlo},
author = {Vanetti, Paul and Bouchard-C{\^o}t{\'e}, Alexandre and Deligiannidis, George and Doucet, Arnaud},
journal = {arXiv preprint arXiv:1707.05296},
year = {2017}
}
J. Heng
,
A. N. Bishop
,
G. Deligiannidis
,
A. Doucet
,
Controlled Sequential Monte Carlo, arXiv preprint arXiv:1708.08396, 2017.
@article{heng2017controlled,
title = {Controlled Sequential Monte Carlo},
author = {Heng, Jeremy and Bishop, Adrian N and Deligiannidis, George and Doucet, Arnaud},
journal = {arXiv preprint arXiv:1708.08396},
year = {2017}
}
@article{doucet2017markov,
title = {On Markov chain Monte Carlo Methods for Tall Data},
author = {Doucet, A and Holmes, CC and Bardenet, R},
year = {2017}
}
C. J. Maddison
,
D. Lawson
,
G. Tucker
,
N. Heess
,
M. Norouzi
,
A. Mnih
,
A. Doucet
,
Y. W. Teh
,
Particle Value Functions, in ICLR 2017 Workshop Proceedings, 2017.
The policy gradients of the expected return objective can react slowly to rare rewards. Yet, in some cases agents may wish to emphasize the low or high returns regardless of their probability. Borrowing from the economics and control literature, we review the risk-sensitive value function that arises from an exponential utility and illustrate its effects on an example. This risk-sensitive value function is not always applicable to reinforcement learning problems, so we introduce the particle value function defined by a particle filter over the distributions of an agent’s experience, which bounds the risk-sensitive one. We illustrate the benefit of the policy gradients of this objective in Cliffworld.
@inproceedings{MadLawTuc2017a,
author = {Maddison, C. J. and Lawson, D. and Tucker, G. and Heess, N. and Norouzi, M. and Mnih, A. and Doucet, A. and Teh, Y. W.},
booktitle = {ICLR 2017 Workshop Proceedings},
note = {ArXiv e-prints: 1703.05820},
title = {Particle Value Functions},
year = {2017},
bdsk-url-1 = {https://arxiv.org/pdf/1705.09279v1.pdf}
}
2016
T. Rainforth
,
C. A. Naesseth
,
F. Lindsten
,
B. Paige
,
J. Meent
,
A. Doucet
,
F. Wood
,
Interacting Particle Markov Chain Monte Carlo, in Proceedings of the 33rd International Conference on Machine Learning, 2016, vol. 48.
We introduce interacting particle Markov chain
Monte Carlo (iPMCMC), a PMCMC method
based on an interacting pool of standard and conditional
sequential Monte Carlo samplers. Like
related methods, iPMCMC is a Markov chain
Monte Carlo sampler on an extended space. We
present empirical results that show significant improvements
in mixing rates relative to both noninteracting
PMCMC samplers, and a single PMCMC
sampler with an equivalent memory and
computational budget. An additional advantage
of the iPMCMC method is that it is suitable for
distributed and multi-core architectures.
@inproceedings{rainforth2016ipmcmc,
title = {Interacting Particle {M}arkov Chain {M}onte {C}arlo},
author = {Rainforth, Tom and Naesseth, Christian A and Lindsten, Fredrik and Paige, Brooks and van de Meent, Jan-Willem and Doucet, Arnaud and Wood, Frank},
booktitle = {Proceedings of the 33rd International Conference on Machine Learning},
series = {JMLR: W\&CP},
volume = {48},
year = {2016}
}
2015
A. Doucet
,
M. Pitt
,
G. Deligiannidis
,
R. Kohn
,
Efficient implementation of Markov chain Monte Carlo when using an unbiased likelihood estimator, Biometrika, vol. 102, no. 2, 295–313, 2015.
@article{doucet2015efficient,
title = {Efficient implementation of Markov chain Monte Carlo when using an unbiased likelihood estimator},
author = {Doucet, Arnaud and Pitt, MK and Deligiannidis, George and Kohn, Robert},
journal = {Biometrika},
volume = {102},
number = {2},
pages = {295--313},
year = {2015},
publisher = {Oxford University Press}
}
@article{bardenet2015markov,
title = {On Markov chain Monte Carlo methods for tall data},
author = {Bardenet, R{\'e}mi and Doucet, Arnaud and Holmes, Chris C.},
journal = {arXiv preprint arXiv:1505.02827},
year = {2015}
}
@article{bardenet2015markow,
title = {Markov chain Monte Carlo and tall data},
author = {Bardenet, R{\'e}mi and Doucet, A and Holmes, C},
journal = {preprint},
year = {2015}
}
T. Lienart
,
Y. W. Teh
,
A. Doucet
,
Expectation Particle Belief Propagation, in Advances in Neural Information Processing Systems (NeurIPS), 2015.
We propose an original particle-based implementation of the Loopy Belief Propagation (LPB) algorithm for pairwise Markov Random Fields (MRF) on a continuous state space. The algorithm constructs adaptively efficient proposal distributions approximating the local beliefs at each note of the MRF. This is achieved by considering proposal distributions in the exponential family whose parameters are updated iterately in an Expectation Propagation (EP) framework. The proposed particle scheme provides consistent estimation of the LBP marginals as the number of particles increases. We demonstrate that it provides more accurate results than the Particle Belief Propagation (PBP) algorithm of Ihler and McAllester (2009) at a fraction of the computational cost and is additionally more robust empirically. The computational complexity of our algorithm at each iteration is quadratic in the number of particles. We also propose an accelerated implementation with sub-quadratic computational complexity which still provides consistent estimates of the loopy BP marginal distributions and performs almost as well as the original procedure.
@inproceedings{LieTehDou2015a,
author = {Lienart, T. and Teh, Y. W. and Doucet, A.},
booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
title = {Expectation Particle Belief Propagation},
year = {2015},
bdsk-url-1 = {http://papers.nips.cc/paper/5674-expectation-particle-belief-propagation},
bdsk-url-2 = {http://papers.nips.cc/paper/5674-expectation-particle-belief-propagation.pdf},
bdsk-url-3 = {http://papers.nips.cc/paper/5674-expectation-particle-belief-propagation-supplemental.zip}
}
2014
R. Bardenet
,
A. Doucet
,
C. C. Holmes
,
Towards scaling up Markov chain Monte Carlo: an adaptive subsampling approach, in Proceedings of the 31st International Conference on Machine Learning (ICML-14), 2014, 405–413.
@inproceedings{bardenet2014towards,
title = {Towards scaling up Markov chain Monte Carlo: an adaptive subsampling approach},
author = {Bardenet, R{\'e}mi and Doucet, Arnaud and Holmes, Chris C.},
booktitle = {Proceedings of the 31st International Conference on Machine Learning (ICML-14)},
pages = {405--413},
year = {2014}
}
R. Bardenet
,
A. Doucet
,
C. C. Holmes
,
An adaptive subsampling approach for MCMC inference in large datasets, in Proceedings of The 31st International Conference on Machine Learning, 2014, 405–413.
@inproceedings{bardenet2014adaptive,
title = {An adaptive subsampling approach for MCMC inference in large datasets},
author = {Bardenet, R{\'e}mi and Doucet, Arnaud and Holmes, Chris C.},
booktitle = {Proceedings of The 31st International Conference on Machine Learning},
pages = {405--413},
year = {2014}
}
B. Paige
,
F. Wood
,
A. Doucet
,
Y. W. Teh
,
Asynchronous Anytime Sequential Monte Carlo, in Advances in Neural Information Processing Systems (NeurIPS), 2014.
We introduce a new sequential Monte Carlo algorithm we call the particle cascade. The particle cascade is an asynchronous, anytime alternative to traditional sequential Monte Carlo algorithms that is amenable to parallel and distributed implementations. It uses no barrier synchronizations which leads to improved particle throughput and memory efficiency. It is an anytime algorithm in the sense that it can be run forever to emit an unbounded number of particles while keeping within a fixed memory budget. We prove that the particle cascade provides an unbiased marginal likelihood estimator which can be straightforwardly plugged into existing pseudo-marginal methods.
@inproceedings{PaiWooDou2014a,
author = {Paige, B. and Wood, F. and Doucet, A. and Teh, Y. W.},
booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
title = {Asynchronous Anytime Sequential {M}onte {C}arlo},
year = {2014},
bdsk-url-1 = {http://papers.nips.cc/paper/5450-asynchronous-anytime-sequential-monte-carlo},
bdsk-url-2 = {http://papers.nips.cc/paper/5450-asynchronous-anytime-sequential-monte-carlo.pdf},
bdsk-url-3 = {http://papers.nips.cc/paper/5450-asynchronous-anytime-sequential-monte-carlo-supplemental.zip}
}
2012
A. Lee
,
F. Caron
,
A. Doucet
,
C. C. Holmes
,
. others
,
Bayesian sparsity-path-analysis of genetic association signal using generalized t priors, Statistical applications in genetics and molecular biology, vol. 11, no. 2, 1–29, 2012.
@article{lee2012bayesian,
title = {Bayesian sparsity-path-analysis of genetic association signal using generalized t priors},
author = {Lee, Anthony and Caron, Francois and Doucet, Arnaud and Holmes, Chris C. and others},
journal = {Statistical applications in genetics and molecular biology},
volume = {11},
number = {2},
pages = {1--29},
year = {2012},
publisher = {Walter de Gruyter GmbH \& Co. KG}
}
2010
A. Lee
,
F. Caron
,
A. Doucet
,
C. C. Holmes
,
A hierarchical Bayesian framework for constructing sparsity-inducing priors, arXiv preprint arXiv:1009.1914, 2010.
@article{lee2010hierarchical,
title = {A hierarchical Bayesian framework for constructing sparsity-inducing priors},
author = {Lee, Anthony and Caron, Francois and Doucet, Arnaud and Holmes, Chris C.},
journal = {arXiv preprint arXiv:1009.1914},
year = {2010}
}
A. Lee
,
C. Yau
,
M. B. Giles
,
A. Doucet
,
C. C. Holmes
,
On the utility of graphics cards to perform massively parallel simulation of advanced Monte Carlo methods, Journal of Computational and Graphical Statistics, vol. 19, no. 4, 769–789, 2010.
@article{lee2010utility,
title = {On the utility of graphics cards to perform massively parallel simulation of advanced Monte Carlo methods},
author = {Lee, Anthony and Yau, Christopher and Giles, Michael B and Doucet, Arnaud and Holmes, Chris C},
journal = {Journal of Computational and Graphical Statistics},
volume = {19},
number = {4},
pages = {769--789},
year = {2010},
publisher = {ASA}
}
2009
F. Caron
,
A. Doucet
,
Bayesian Nonparametric Models on Decomposable Graphs, in Advances in Neural Information Processing Systems (NeurIPS), 2009.
@inproceedings{Caron2009,
title = {Bayesian Nonparametric Models on Decomposable Graphs},
author = {Caron, F. and Doucet, A.},
booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
year = {2009},
owner = {caron},
timestamp = {2016.10.24}
}
S. Anjum
,
A. Doucet
,
C. C. Holmes
,
A boosting approach to structure learning of graphs with and without prior knowledge, Bioinformatics, vol. 25, no. 22, 2929–2936, 2009.
@article{anjum2009boosting,
title = {A boosting approach to structure learning of graphs with and without prior knowledge},
author = {Anjum, Shahzia and Doucet, Arnaud and Holmes, Chris C},
journal = {Bioinformatics},
volume = {25},
number = {22},
pages = {2929--2936},
year = {2009},
publisher = {Oxford Univ Press}
}
2008
F. Caron
,
M. Davy
,
A. Doucet
,
E. Duflos
,
P. Vanheeghe
,
Bayesian inference for linear dynamic models with Dirichlet process mixtures, IEEE Transactions on Signal Processing, vol. 56, no. 1, 71–84, 2008.
@article{Caron2008,
title = {Bayesian inference for linear dynamic models with {D}irichlet process mixtures},
author = {Caron, F. and Davy, M. and Doucet, A. and Duflos, E. and Vanheeghe, P.},
journal = {IEEE Transactions on Signal Processing},
year = {2008},
number = {1},
pages = {71--84},
volume = {56},
owner = {caron},
publisher = {IEEE},
timestamp = {2016.10.24}
}
A. Jasra
,
A. Doucet
,
D. A. Stephens
,
C. C. Holmes
,
Interacting sequential Monte Carlo samplers for trans-dimensional simulation, Computational Statistics & Data Analysis, vol. 52, no. 4, 1765–1791, 2008.
@article{jasra2008interacting,
title = {Interacting sequential Monte Carlo samplers for trans-dimensional simulation},
author = {Jasra, Ajay and Doucet, Arnaud and Stephens, David A and Holmes, Chris C},
journal = {Computational Statistics \& Data Analysis},
volume = {52},
number = {4},
pages = {1765--1791},
year = {2008},
publisher = {Elsevier}
}
2007
F. Caron
,
M. Davy
,
A. Doucet
,
Generalized Polya urn for time-varying Dirichlet process mixtures, in Uncertainty in Artificial Intelligence (UAI), 2007.
@inproceedings{Caron2007,
title = {{Generalized Polya urn for time-varying Dirichlet process mixtures}},
author = {Caron, F. and Davy, M. and Doucet, A.},
booktitle = {Uncertainty in Artificial Intelligence (UAI)},
year = {2007},
owner = {caron},
timestamp = {2016.10.24}
}