You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

253 lines
18 KiB

title = {{{PILCO}}: {{A}} Model-Based and Data-Efficient Approach to Policy Search},
shorttitle = {{{PILCO}}},
booktitle = {Proceedings of the 28th {{International Conference}} on Machine Learning ({{ICML}}-11)},
author = {Deisenroth, Marc and Rasmussen, Carl E.},
year = {2011},
pages = {465--472},
file = {/home/markus/Zotero/storage/YEDBHXGB/Deisenroth and Rasmussen - 2011 - PILCO A model-based and data-efficient approach t.pdf}
archivePrefix = {arXiv},
eprinttype = {arxiv},
eprint = {1301.2294},
title = {Expectation {{Propagation}} for Approximate {{Bayesian}} Inference},
abstract = {This paper presents a new deterministic approximation technique in Bayesian networks. This method, "Expectation Propagation", unifies two previous techniques: assumed-density filtering, an extension of the Kalman filter, and loopy belief propagation, an extension of belief propagation in Bayesian networks. All three algorithms try to recover an approximate distribution which is close in KL divergence to the true distribution. Loopy belief propagation, because it propagates exact belief states, is useful for a limited class of belief networks, such as those which are purely discrete. Expectation Propagation approximates the belief states by only retaining certain expectations, such as mean and variance, and iterates until these expectations are consistent throughout the network. This makes it applicable to hybrid networks with discrete and continuous nodes. Expectation Propagation also extends belief propagation in the opposite direction - it can propagate richer belief states that incorporate correlations between nodes. Experiments with Gaussian mixture models show Expectation Propagation to be convincingly better than methods with similar computational cost: Laplace's method, variational Bayes, and Monte Carlo. Expectation Propagation also provides an efficient algorithm for training Bayes point machine classifiers.},
booktitle = {Proceedings of the {{Seventeenth}} Conference on {{Uncertainty}} in Artificial Intelligence},
publisher = {{Morgan Kaufmann Publishers Inc.}},
author = {Minka, Thomas P.},
year = {2001},
keywords = {Computer Science - Artificial Intelligence,Computer Science - Learning},
pages = {362--369},
file = {/home/markus/Zotero/storage/KNSHS9MI/Minka - 2013 - Expectation Propagation for approximate Bayesian i.pdf;/home/markus/Zotero/storage/3UKPDVIC/1301.html}
title = {Decomposition of {{Uncertainty}} in {{Bayesian Deep Learning}} for {{Efficient}} and {{Risk}}-Sensitive {{Learning}}},
booktitle = {International {{Conference}} on {{Machine Learning}}},
author = {Depeweg, Stefan and {Hernandez-Lobato}, Jose-Miguel and {Doshi-Velez}, Finale and Udluft, Steffen},
year = {2018},
pages = {1192--1201},
file = {/home/markus/Zotero/storage/IQV8Z9UK/Depeweg et al. - Decomposition of Uncertainty in Bayesian Deep Lear.pdf;/home/markus/Zotero/storage/TK4ZWLEU/Depeweg et al. - Decomposition of Uncertainty in Bayesian Deep Lear.pdf;/home/markus/Zotero/storage/FR49N2TD/auD.html}
archivePrefix = {arXiv},
eprinttype = {arxiv},
eprint = {1605.07127},
primaryClass = {cs, stat},
title = {Learning and {{Policy Search}} in {{Stochastic Dynamical Systems}} with {{Bayesian Neural Networks}}},
abstract = {We present an algorithm for policy search in stochastic dynamical systems using model-based reinforcement learning. The system dynamics are described with Bayesian neural networks (BNNs) that include stochastic input variables. These input variables allow us to capture complex statistical patterns in the transition dynamics (e.g. multi-modality and heteroskedasticity), which are usually missed by alternative modeling approaches. After learning the dynamics, our BNNs are then fed into an algorithm that performs random roll-outs and uses stochastic optimization for policy learning. We train our BNNs by minimizing {$\alpha$}-divergences with {$\alpha$} = 0.5, which usually produces better results than other techniques such as variational Bayes. We illustrate the performance of our method by solving a challenging problem where model-based approaches usually fail and by obtaining promising results in real-world scenarios including the control of a gas turbine and an industrial benchmark.},
language = {en},
journal = {arXiv:1605.07127 [cs, stat]},
author = {Depeweg, Stefan and {Hern{\'a}ndez-Lobato}, Jos{\'e} Miguel and {Doshi-Velez}, Finale and Udluft, Steffen},
month = may,
year = {2016},
keywords = {Statistics - Machine Learning,Computer Science - Learning,Computer Science - Machine Learning},
file = {/home/markus/Zotero/storage/PDDB5NVV/Depeweg et al. - 2016 - Learning and Policy Search in Stochastic Dynamical.pdf;/home/markus/Zotero/storage/X5C6XVZT/Depeweg et al. - 2016 - Learning and Policy Search in Stochastic Dynamical.pdf;/home/markus/Zotero/storage/MWFZ45PK/1605.html}
archivePrefix = {arXiv},
eprinttype = {arxiv},
eprint = {1810.07158},
primaryClass = {cs, stat},
title = {Data {{Association}} with {{Gaussian Processes}}},
abstract = {The data association problem is concerned with separating data coming from different generating processes, for example when data come from different data sources, contain significant noise, or exhibit multimodality. We present a fully Bayesian approach to this problem. Our model is capable of simultaneously solving the data association problem and the induced supervised learning problems. Underpinning our approach is the use of Gaussian process priors to encode the structure of both the data and the data associations. We present an efficient learning scheme based on doubly stochastic variational inference and discuss how it can be applied to deep Gaussian process priors.},
journal = {arXiv:1810.07158 [cs, stat]},
author = {Kaiser, Markus and Otte, Clemens and Runkler, Thomas and Ek, Carl Henrik},
month = oct,
year = {2018},
keywords = {Computer Science - Machine Learning,Statistics - Machine Learning},
file = {/home/markus/Zotero/storage/EC5P3VNZ/Kaiser et al. - Data Association with Gaussian Processes.pdf;/home/markus/Zotero/storage/ZB65CUNK/Kaiser et al. - 2018 - Data Association with Gaussian Processes.pdf;/home/markus/Zotero/storage/S7VSJTCS/1810.html},
ids = {kaiser\_data\_nodate}
title = {Bayesian {{Alignments}} of {{Warped Multi}}-{{Output Gaussian Processes}}},
booktitle = {Advances in {{Neural Information Processing Systems}} 31},
publisher = {{Curran Associates, Inc.}},
author = {Kaiser, Markus and Otte, Clemens and Runkler, Thomas and Ek, Carl Henrik},
editor = {Bengio, S. and Wallach, H. and Larochelle, H. and Grauman, K. and {Cesa-Bianchi}, N. and Garnett, R.},
year = {2018},
keywords = {Computer Science - Learning,Computer Science - Machine Learning,Statistics - Machine Learning},
pages = {6995--7004},
file = {/home/markus/Zotero/storage/MJQDUDFP/Kaiser et al. - 2017 - Bayesian Alignments of Warped Multi-Output Gaussia.pdf;/home/markus/Zotero/storage/N8FXLQBZ/Kaiser et al. - 2018 - Bayesian Alignments of Warped Multi-Output Gaussia.pdf;/home/markus/Zotero/storage/P98SK2MP/Kaiser et al. - 2017 - Bayesian Alignments of Warped Multi-Output Gaussia.pdf;/home/markus/Zotero/storage/5KMTSPFI/7931-bayesian-alignments-of-warped-multi-output-gaussian-processes.html;/home/markus/Zotero/storage/AHX4Q83I/1710.html;/home/markus/Zotero/storage/UKGW6CEX/1710.html}
title = {Scalable Variational {{Gaussian}} Process Classification},
volume = {38},
abstract = {Gaussian process classification is a popular method with a number of appealing properties. We show how to scale the model within a variational inducing point framework, outperforming the state of the art on benchmark datasets. Importantly, the variational formulation can be exploited to allow classification in problems with millions of data points, as we demonstrate in experiments. Copyright 2015 by the authors.},
journal = {Journal of Machine Learning Research},
author = {Hensman, James and Matthews, Alexander G. de G. and Ghahramani, Zoubin},
year = {2015},
keywords = {Statistics - Machine Learning},
pages = {351-360},
file = {/home/markus/Zotero/storage/T4WFAQPK/Hensman et al. - 2014 - Scalable Variational Gaussian Process Classificati.pdf;/home/markus/Zotero/storage/5GEKF8R7/1411.html;/home/markus/Zotero/storage/FX4I5R8Q/display.html}
title = {Neural Fitted {{Q}} Iteration - First Experiences with a Data Efficient Neural Reinforcement Learning Method},
booktitle = {European {{Conference}} on {{Machine Learning}}},
publisher = {{Springer}},
author = {Riedmiller, Martin},
year = {2005},
pages = {317--328},
file = {/home/markus/Zotero/storage/3JSSKLJS/Riedmiller - 2005 - Neural fitted Q iterationfirst experiences with a.pdf;/home/markus/Zotero/storage/SCF964Y7/11564096_32.html}
title = {Batch Reinforcement Learning},
booktitle = {Reinforcement Learning},
publisher = {{Springer}},
author = {Lange, Sascha and Gabel, Thomas and Riedmiller, Martin},
year = {2012},
pages = {45--73},
file = {/home/markus/Zotero/storage/9AQ568ZU/978-3-642-27645-3_2.html}
title = {The Wet Game of Chicken},
journal = {Siemens AG, CT IC 4, Technical Report},
author = {Tresp, Volker},
year = {1994}
address = {{Cambridge, Mass}},
series = {Adaptive Computation and Machine Learning},
title = {Reinforcement Learning: An Introduction},
isbn = {978-0-262-19398-6},
lccn = {Q325.6 .S88 1998},
shorttitle = {Reinforcement Learning},
language = {en},
publisher = {{MIT Press}},
author = {Sutton, Richard S. and Barto, Andrew G.},
year = {1998},
keywords = {Reinforcement learning},
file = {/home/markus/Zotero/storage/6EQF4BV3/Sutton and Barto - 1998 - Reinforcement learning an introduction.pdf}
title = {Overlapping Mixtures of {{Gaussian}} Processes for the Data Association Problem},
volume = {45},
number = {4},
journal = {Pattern Recognition},
author = {{L{\'a}zaro-Gredilla}, Miguel and Van Vaerenbergh, Steven and Lawrence, Neil D.},
year = {2012},
keywords = {Computer Science - Artificial Intelligence,Statistics - Machine Learning,Computer Science - Machine Learning},
pages = {1386--1395},
file = {/home/markus/Zotero/storage/9PWMT93W/Lázaro-Gredilla et al. - 2011 - Overlapping Mixtures of Gaussian Processes for the.pdf;/home/markus/Zotero/storage/GSDXF73X/Lázaro-Gredilla et al. - 2012 - Overlapping mixtures of Gaussian processes for the.pdf;/home/markus/Zotero/storage/HZYC3SEH/S0031320311004109.html;/home/markus/Zotero/storage/WXY7PLDG/1108.html}
title = {Deep {{Gaussian Processes}}},
abstract = {In this paper we introduce deep Gaussian process (GP) models. Deep GPs are a deep belief network based on Gaussian process mappings. The data is modeled as the output of a multivariate GP. The inpu...},
language = {en},
booktitle = {Artificial {{Intelligence}} and {{Statistics}}},
author = {Damianou, Andreas and Lawrence, Neil},
month = apr,
year = {2013},
keywords = {Statistics - Machine Learning,Computer Science - Learning,G.1.2,G.3,I.2.6,Mathematics - Probability,60G15; 58E30},
pages = {207-215},
file = {/home/markus/Zotero/storage/BUXWE2UV/Damianou and Lawrence - 2012 - Deep Gaussian Processes.pdf;/home/markus/Zotero/storage/WKSUAVEY/Damianou und Lawrence - 2013 - Deep Gaussian Processes.pdf;/home/markus/Zotero/storage/K6M9IPNY/damianou13a.html;/home/markus/Zotero/storage/S2KB72DK/1211.html}
title = {Mixture Density Networks},
author = {Bishop, Christopher M.},
year = {1994},
file = {/home/markus/Zotero/storage/HLZBIATM/Bishop - 1994 - Mixture density networks.pdf}
title = {Infinite {{Mixtures}} of {{Gaussian Process Experts}}},
booktitle = {Advances in {{Neural Information Processing Systems}} 14},
publisher = {{MIT Press}},
author = {Rasmussen, Carl E. and Ghahramani, Zoubin},
editor = {Dietterich, T. G. and Becker, S. and Ghahramani, Z.},
year = {2002},
pages = {881--888},
file = {/home/markus/Zotero/storage/VHKBGGWV/Rasmussen und Ghahramani - 2002 - Infinite Mixtures of Gaussian Process Experts.pdf;/home/markus/Zotero/storage/3YP3XGSE/2055-infinite-mixtures-of-gaussian-process-experts.html}
title = {{{TensorFlow}}: {{Large}}-{{Scale Machine Learning}} on {{Heterogeneous Systems}}},
author = {Abadi, Mart{\'i}n and Agarwal, Ashish and Barham, Paul and Brevdo, Eugene and Chen, Zhifeng and Citro, Craig and Corrado, Greg S. and Davis, Andy and Dean, Jeffrey and Devin, Matthieu and Ghemawat, Sanjay and Goodfellow, Ian and Harp, Andrew and Irving, Geoffrey and Isard, Michael and Jia, Yangqing and Jozefowicz, Rafal and Kaiser, Lukasz and Kudlur, Manjunath and Levenberg, Josh and Man{\'e}, Dandelion and Monga, Rajat and Moore, Sherry and Murray, Derek and Olah, Chris and Schuster, Mike and Shlens, Jonathon and Steiner, Benoit and Sutskever, Ilya and Talwar, Kunal and Tucker, Paul and Vanhoucke, Vincent and Vasudevan, Vijay and Vi{\'e}gas, Fernanda and Vinyals, Oriol and Warden, Pete and Wattenberg, Martin and Wicke, Martin and Yu, Yuan and Zheng, Xiaoqiang},
year = {2015},
file = {/home/markus/Zotero/storage/WLKK3HI7/Abadi et al. - 2015 - TensorFlow Large-Scale Machine Learning on Hetero.pdf},
note = {Software available from}
archivePrefix = {arXiv},
eprinttype = {arxiv},
eprint = {1611.00712},
primaryClass = {cs, stat},
title = {The {{Concrete Distribution}}: {{A Continuous Relaxation}} of {{Discrete Random Variables}}},
shorttitle = {The {{Concrete Distribution}}},
abstract = {The reparameterization trick enables optimizing large scale stochastic computation graphs via gradient descent. The essence of the trick is to refactor each stochastic node into a differentiable function of its parameters and a random variable with fixed distribution. After refactoring, the gradients of the loss propagated by the chain rule through the graph are low variance unbiased estimators of the gradients of the expected loss. While many continuous random variables have such reparameterizations, discrete random variables lack useful reparameterizations due to the discontinuous nature of discrete states. In this work we introduce Concrete random variables---continuous relaxations of discrete random variables. The Concrete distribution is a new family of distributions with closed form densities and a simple reparameterization. Whenever a discrete stochastic node of a computation graph can be refactored into a one-hot bit representation that is treated continuously, Concrete stochastic nodes can be used with automatic differentiation to produce low-variance biased gradients of objectives (including objectives that depend on the log-probability of latent stochastic nodes) on the corresponding discrete graph. We demonstrate the effectiveness of Concrete relaxations on density estimation and structured prediction tasks using neural networks.},
journal = {arXiv:1611.00712 [cs, stat]},
author = {Maddison, Chris J. and Mnih, Andriy and Teh, Yee Whye},
month = nov,
year = {2016},
keywords = {Statistics - Machine Learning,Computer Science - Machine Learning},
file = {/home/markus/Zotero/storage/4JCGSNBV/Maddison et al. - 2016 - The Concrete Distribution A Continuous Relaxation.pdf;/home/markus/Zotero/storage/M4WZ8CC4/1611.html}
title = {Efficient Uncertainty Propagation for Reinforcement Learning with Limited Data},
booktitle = {International {{Conference}} on {{Artificial Neural Networks}}},
publisher = {{Springer}},
author = {Hans, Alexander and Udluft, Steffen},
year = {2009},
pages = {70--79},
file = {/home/markus/Zotero/storage/28GBNUPD/Hans und Udluft - 2009 - Efficient uncertainty propagation for reinforcemen.pdf;/home/markus/Zotero/storage/WGKEVCHC/978-3-642-04274-4_8.html}
archivePrefix = {arXiv},
eprinttype = {arxiv},
eprint = {1707.05534},
primaryClass = {cs, stat},
title = {Latent {{Gaussian Process Regression}}},
abstract = {We introduce Latent Gaussian Process Regression which is a latent variable extension allowing modelling of non-stationary processes using stationary GP priors. The approach is built on extending the input space of a regression problem with a latent variable that is used to modulate the covariance function over the input space. We show how our approach can be used to model non-stationary processes but also how multi-modal or non-functional processes can be described where the input signal cannot fully disambiguate the output. We exemplify the approach on a set of synthetic data and provide results on real data from geostatistics.},
journal = {arXiv:1707.05534 [cs, stat]},
author = {Bodin, Erik and Campbell, Neill D. F. and Ek, Carl Henrik},
month = jul,
year = {2017},
keywords = {Statistics - Machine Learning,Computer Science - Learning},
file = {/home/markus/Zotero/storage/IWU4IK4P/Bodin et al. - 2017 - Latent Gaussian Process Regression.pdf;/home/markus/Zotero/storage/AZSUGMDS/1707.html}
author = {Shalev-Shwartz, Shai and Ben-David, Shai},
title = {Understanding Machine Learning: From Theory to Algorithms},
year = {2014},
isbn = {1107057132, 9781107057135},
publisher = {Cambridge University Press},
address = {New York, NY, USA},
author = {Bar-Shalom, Y.},
title = {Tracking and Data Association},
year = {1987},
isbn = {0-120-79760-7},
publisher = {Academic Press Professional, Inc.},
address = {San Diego, CA, USA},
author = {Ingemar J. Cox},
title = {A Review of Statistical Data Association Techniques for Motion Correspondence},
journal = {International Journal of Computer Vision},
year = {1993},
volume = {10},
pages = {53--66}