You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

247 lines
20 KiB

@article{damianou_deep_2012,
archivePrefix = {arXiv},
eprinttype = {arxiv},
eprint = {1211.0358},
primaryClass = {cs, math, stat},
title = {Deep {{Gaussian Processes}}},
url = {http://arxiv.org/abs/1211.0358},
abstract = {In this paper we introduce deep Gaussian process (GP) models. Deep GPs are a deep belief network based on Gaussian process mappings. The data is modeled as the output of a multivariate GP. The inputs to that Gaussian process are then governed by another GP. A single layer model is equivalent to a standard GP or the GP latent variable model (GP-LVM). We perform inference in the model by approximate variational marginalization. This results in a strict lower bound on the marginal likelihood of the model which we use for model selection (number of layers and nodes per layer). Deep belief networks are typically applied to relatively large data sets using stochastic gradient descent for optimization. Our fully Bayesian treatment allows for the application of deep models even when data is scarce. Model selection by our variational bound shows that a five layer hierarchy is justified even when modelling a digit data set containing only 150 examples.},
urldate = {2016-09-05},
date = {2012-11-01},
keywords = {Computer Science - Learning,G.3,I.2.6,Statistics - Machine Learning,60G15; 58E30,G.1.2,Mathematics - Probability},
author = {Damianou, Andreas C. and Lawrence, Neil D.},
file = {C:\\Users\\markus\\Zotero\\storage\\BUXWE2UV\\Damianou and Lawrence - 2012 - Deep Gaussian Processes.pdf;C:\\Users\\markus\\Zotero\\storage\\S2KB72DK\\1211.html}
}
@article{jacobs_adaptive_1991,
title = {Adaptive Mixtures of Local Experts},
volume = {3},
number = {1},
journaltitle = {Neural computation},
date = {1991},
pages = {79--87},
author = {Jacobs, Robert A. and Jordan, Michael I. and Nowlan, Steven J. and Hinton, Geoffrey E.},
file = {C:\\Users\\markus\\Zotero\\storage\\T4BQHV93\\Jacobs et al. - 1991 - Adaptive mixtures of local experts.pdf;C:\\Users\\markus\\Zotero\\storage\\APX3N2YA\\neco.1991.3.1.html;C:\\Users\\markus\\Zotero\\storage\\WZAY9YTW\\auD.html}
}
@article{hathaway_switching_1993,
title = {Switching Regression Models and Fuzzy Clustering},
volume = {1},
issn = {1063-6706},
doi = {10.1109/91.236552},
abstract = {A family of objective functions called fuzzy c-regression models, which can be used too fit switching regression models to certain types of mixed data, is presented. Minimization of particular objective functions in the family yields simultaneous estimates for the parameters of c regression models, together with a fuzzy c-partitioning of the data. A general optimization approach for the family of objective functions is given and corresponding theoretical convergence results are discussed. The approach is illustrated by two numerical examples that show how it can be used to fit mixed data to coupled linear and nonlinear models},
number = {3},
journaltitle = {IEEE Transactions on Fuzzy Systems},
date = {1993-08},
pages = {195-204},
keywords = {Computer science,Clustering algorithms,convergence,Convergence,convergence of numerical methods,Couplings,Covariance matrix,fuzzy c-regression models,fuzzy clustering,fuzzy set theory,Fuzzy sets,Linear approximation,Marine animals,minimisation,mixed data,objective functions,parameter estimation,Parameter estimation,statistical analysis,switching regression models,Yield estimation},
author = {Hathaway, R. J. and Bezdek, J. C.},
file = {C:\\Users\\markus\\Zotero\\storage\\ILUA2D27\\Hathaway und Bezdek - 1993 - Switching regression models and fuzzy clustering.pdf;C:\\Users\\markus\\Zotero\\storage\\KT65LIIU\\236552.html}
}
@article{kaiser_bayesian_2017,
archivePrefix = {arXiv},
eprinttype = {arxiv},
eprint = {1710.02766},
primaryClass = {cs, stat},
title = {Bayesian {{Alignments}} of {{Warped Multi}}-{{Output Gaussian Processes}}},
url = {http://arxiv.org/abs/1710.02766},
abstract = {We propose a novel Bayesian approach to modelling nonlinear alignments of time series based on latent shared information. We apply the method to the real-world problem of finding common structure in the sensor data of wind turbines introduced by the underlying latent and turbulent wind field. The proposed model allows for both arbitrary alignments of the inputs and non-parametric output warpings to transform the observations. This gives rise to multiple deep Gaussian process models connected via latent generating processes. We present an efficient variational approximation based on nested variational compression and show how the model can be used to extract shared information between dependent time series, recovering an interpretable functional decomposition of the learning problem. We show results for an artificial data set and real-world data of two wind turbines.},
urldate = {2018-06-08},
date = {2017-10-07},
keywords = {Computer Science - Learning,Statistics - Machine Learning},
author = {Kaiser, Markus and Otte, Clemens and Runkler, Thomas and Ek, Carl Henrik},
file = {C:\\Users\\markus\\Zotero\\storage\\MJQDUDFP\\Kaiser et al. - 2017 - Bayesian Alignments of Warped Multi-Output Gaussia.pdf;C:\\Users\\markus\\Zotero\\storage\\UKGW6CEX\\1710.html}
}
@article{matthews_gpflow_2017,
title = {{{GPflow}}: {{A Gaussian}} Process Library Using {{TensorFlow}}},
volume = {18},
url = {http://www.jmlr.org/papers/volume18/16-537/16-537.pdf},
shorttitle = {{{GPflow}}},
number = {40},
journaltitle = {Journal of Machine Learning Research},
urldate = {2017-09-27},
date = {2017},
pages = {1--6},
author = {Matthews, Alexander G. de G. and van der Wilk, Mark and Nickson, Tom and Fujii, Keisuke and Boukouvalas, Alexis and León-Villagrá, Pablo and Ghahramani, Zoubin and Hensman, James},
options = {useprefix=true},
file = {C:\\Users\\markus\\Zotero\\storage\\X6QGAFR8\\Matthews et al. - 2017 - GPflow A Gaussian process library using TensorFlo.pdf}
}
@article{bodin_latent_2017,
archivePrefix = {arXiv},
eprinttype = {arxiv},
eprint = {1707.05534},
primaryClass = {cs, stat},
title = {Latent {{Gaussian Process Regression}}},
url = {http://arxiv.org/abs/1707.05534},
abstract = {We introduce Latent Gaussian Process Regression which is a latent variable extension allowing modelling of non-stationary processes using stationary GP priors. The approach is built on extending the input space of a regression problem with a latent variable that is used to modulate the covariance function over the input space. We show how our approach can be used to model non-stationary processes but also how multi-modal or non-functional processes can be described where the input signal cannot fully disambiguate the output. We exemplify the approach on a set of synthetic data and provide results on real data from geostatistics.},
urldate = {2017-08-29},
date = {2017-07-18},
keywords = {Computer Science - Learning,Statistics - Machine Learning},
author = {Bodin, Erik and Campbell, Neill D. F. and Ek, Carl Henrik},
file = {C:\\Users\\markus\\Zotero\\storage\\IWU4IK4P\\Bodin et al. - 2017 - Latent Gaussian Process Regression.pdf;C:\\Users\\markus\\Zotero\\storage\\AZSUGMDS\\1707.html}
}
@article{salimbeni_doubly_2017,
archivePrefix = {arXiv},
eprinttype = {arxiv},
eprint = {1705.08933},
primaryClass = {stat},
title = {Doubly {{Stochastic Variational Inference}} for {{Deep Gaussian Processes}}},
url = {http://arxiv.org/abs/1705.08933},
abstract = {Gaussian processes (GPs) are a good choice for function approximation as they are flexible, robust to over-fitting, and provide well-calibrated predictive uncertainty. Deep Gaussian processes (DGPs) are multi-layer generalisations of GPs, but inference in these models has proved challenging. Existing approaches to inference in DGP models assume approximate posteriors that force independence between the layers, and do not work well in practice. We present a doubly stochastic variational inference algorithm, which does not force independence between layers. With our method of inference we demonstrate that a DGP model can be used effectively on data ranging in size from hundreds to a billion points. We provide strong empirical evidence that our inference scheme for DGPs works well in practice in both classification and regression.},
urldate = {2017-06-02},
date = {2017-05-24},
keywords = {Statistics - Machine Learning},
author = {Salimbeni, Hugh and Deisenroth, Marc},
file = {C:\\Users\\markus\\Zotero\\storage\\FTCRG5BC\\Salimbeni und Deisenroth - 2017 - Doubly Stochastic Variational Inference for Deep G.pdf;C:\\Users\\markus\\Zotero\\storage\\AP6UXDGD\\1705.html}
}
@inproceedings{titsias_variational_2009,
title = {Variational {{Learning}} of {{Inducing Variables}} in {{Sparse Gaussian Processes}}.},
volume = {5},
url = {http://www.jmlr.org/proceedings/papers/v5/titsias09a/titsias09a.pdf},
booktitle = {{{AISTATS}}},
urldate = {2017-04-06},
date = {2009},
pages = {567--574},
author = {Titsias, Michalis K.},
file = {C:\\Users\\markus\\Zotero\\storage\\UTMCPPXS\\Titsias - 2009 - Variational Learning of Inducing Variables in Spar.pdf}
}
@article{hensman_gaussian_2013,
archivePrefix = {arXiv},
eprinttype = {arxiv},
eprint = {1309.6835},
primaryClass = {cs, stat},
title = {Gaussian {{Processes}} for {{Big Data}}},
url = {http://arxiv.org/abs/1309.6835},
abstract = {We introduce stochastic variational inference for Gaussian process models. This enables the application of Gaussian process (GP) models to data sets containing millions of data points. We show how GPs can be vari- ationally decomposed to depend on a set of globally relevant inducing variables which factorize the model in the necessary manner to perform variational inference. Our ap- proach is readily extended to models with non-Gaussian likelihoods and latent variable models based around Gaussian processes. We demonstrate the approach on a simple toy problem and two real world data sets.},
urldate = {2016-07-06},
date = {2013-09-26},
keywords = {Computer Science - Learning,Statistics - Machine Learning},
author = {Hensman, James and Fusi, Nicolo and Lawrence, Neil D.},
file = {C:\\Users\\markus\\Zotero\\storage\\EU8WZFR4\\Hensman et al. - 2013 - Gaussian Processes for Big Data.pdf;C:\\Users\\markus\\Zotero\\storage\\XV3VH9PJ\\Hensman et al. - 2013 - Gaussian Processes for Big Data.pdf;C:\\Users\\markus\\Zotero\\storage\\2JAR4BNM\\1309.html;C:\\Users\\markus\\Zotero\\storage\\ISZ4Z86Q\\1309.html}
}
@article{hensman_scalable_2014,
archivePrefix = {arXiv},
eprinttype = {arxiv},
eprint = {1411.2005},
primaryClass = {stat},
title = {Scalable {{Variational Gaussian Process Classification}}},
url = {http://arxiv.org/abs/1411.2005},
abstract = {Gaussian process classification is a popular method with a number of appealing properties. We show how to scale the model within a variational inducing point framework, outperforming the state of the art on benchmark datasets. Importantly, the variational formulation can be exploited to allow classification in problems with millions of data points, as we demonstrate in experiments.},
urldate = {2017-02-13},
date = {2014-11-07},
keywords = {Statistics - Machine Learning},
author = {Hensman, James and Matthews, Alex and Ghahramani, Zoubin},
file = {C:\\Users\\markus\\Zotero\\storage\\T4WFAQPK\\Hensman et al. - 2014 - Scalable Variational Gaussian Process Classificati.pdf;C:\\Users\\markus\\Zotero\\storage\\5GEKF8R7\\1411.html}
}
@inproceedings{choi_robust_2016,
title = {Robust Learning from Demonstration Using Leveraged {{Gaussian}} Processes and Sparse-Constrained Optimization},
booktitle = {Robotics and {{Automation}} ({{ICRA}}), 2016 {{IEEE International Conference}} On},
publisher = {{IEEE}},
date = {2016},
pages = {470--475},
author = {Choi, Sungjoon and Lee, Kyungjae and Oh, Songhwai},
file = {C:\\Users\\markus\\Zotero\\storage\\LNLKLPLD\\Choi et al. - 2016 - Robust learning from demonstration using leveraged.pdf;C:\\Users\\markus\\Zotero\\storage\\AGU553L3\\7487168.html}
}
@incollection{rasmussen_infinite_2002,
title = {Infinite {{Mixtures}} of {{Gaussian Process Experts}}},
url = {http://papers.nips.cc/paper/2055-infinite-mixtures-of-gaussian-process-experts.pdf},
booktitle = {Advances in {{Neural Information Processing Systems}} 14},
publisher = {{MIT Press}},
urldate = {2018-08-23},
date = {2002},
pages = {881--888},
author = {Rasmussen, Carl E. and Ghahramani, Zoubin},
editor = {Dietterich, T. G. and Becker, S. and Ghahramani, Z.},
file = {C:\\Users\\markus\\Zotero\\storage\\VHKBGGWV\\Rasmussen und Ghahramani - 2002 - Infinite Mixtures of Gaussian Process Experts.pdf;C:\\Users\\markus\\Zotero\\storage\\3YP3XGSE\\2055-infinite-mixtures-of-gaussian-process-experts.html}
}
@article{choi_choicenet_2018,
archivePrefix = {arXiv},
eprinttype = {arxiv},
eprint = {1805.06431},
primaryClass = {cs, stat},
title = {{{ChoiceNet}}: {{Robust Learning}} by {{Revealing Output Correlations}}},
url = {http://arxiv.org/abs/1805.06431},
shorttitle = {{{ChoiceNet}}},
abstract = {In this paper, we focus on the supervised learning problem with corrupted training data. We assume that the training dataset is generated from a mixture of a target distribution and other unknown distributions. We estimate the quality of each data by revealing the correlation between the generated distribution and the target distribution. To this end, we present a novel framework referred to here as ChoiceNet that can robustly infer the target distribution in the presence of inconsistent data. We demonstrate that the proposed framework is applicable to both classification and regression tasks. ChoiceNet is evaluated in comprehensive experiments, where we show that it constantly outperforms existing baseline methods in the handling of noisy data. Particularly, ChoiceNet is successfully applied to autonomous driving tasks where it learns a safe driving policy from a dataset with mixed qualities. In the classification task, we apply the proposed method to the MNIST and CIFAR-10 datasets and it shows superior performances in terms of robustness to noisy labels.},
urldate = {2018-08-23},
date = {2018-05-16},
keywords = {Statistics - Machine Learning,Computer Science - Machine Learning},
author = {Choi, Sungjoon and Hong, Sanghoon and Lim, Sungbin},
file = {C:\\Users\\markus\\Zotero\\storage\\F886D4JS\\Choi et al. - 2018 - ChoiceNet Robust Learning by Revealing Output Cor.pdf;C:\\Users\\markus\\Zotero\\storage\\M6QSGCFL\\1805.html}
}
@article{lazaro-gredilla_overlapping_2011,
archivePrefix = {arXiv},
eprinttype = {arxiv},
eprint = {1108.3372},
primaryClass = {cs, stat},
title = {Overlapping {{Mixtures}} of {{Gaussian Processes}} for the {{Data Association Problem}}},
url = {http://arxiv.org/abs/1108.3372},
abstract = {In this work we introduce a mixture of GPs to address the data association problem, i.e. to label a group of observations according to the sources that generated them. Unlike several previously proposed GP mixtures, the novel mixture has the distinct characteristic of using no gating function to determine the association of samples and mixture components. Instead, all the GPs in the mixture are global and samples are clustered following "trajectories" across input space. We use a non-standard variational Bayesian algorithm to efficiently recover sample labels and learn the hyperparameters. We show how multi-object tracking problems can be disambiguated and also explore the characteristics of the model in traditional regression settings.},
urldate = {2018-08-06},
date = {2011-08-16},
keywords = {Statistics - Machine Learning,Computer Science - Artificial Intelligence,Computer Science - Machine Learning},
author = {Lázaro-Gredilla, Miguel and Van Vaerenbergh, Steven and Lawrence, Neil},
file = {C:\\Users\\markus\\Zotero\\storage\\9PWMT93W\\Lázaro-Gredilla et al. - 2011 - Overlapping Mixtures of Gaussian Processes for the.pdf;C:\\Users\\markus\\Zotero\\storage\\WXY7PLDG\\1108.html}
}
@article{maddison_concrete_2016,
archivePrefix = {arXiv},
eprinttype = {arxiv},
eprint = {1611.00712},
primaryClass = {cs, stat},
title = {The {{Concrete Distribution}}: {{A Continuous Relaxation}} of {{Discrete Random Variables}}},
url = {http://arxiv.org/abs/1611.00712},
shorttitle = {The {{Concrete Distribution}}},
abstract = {The reparameterization trick enables optimizing large scale stochastic computation graphs via gradient descent. The essence of the trick is to refactor each stochastic node into a differentiable function of its parameters and a random variable with fixed distribution. After refactoring, the gradients of the loss propagated by the chain rule through the graph are low variance unbiased estimators of the gradients of the expected loss. While many continuous random variables have such reparameterizations, discrete random variables lack useful reparameterizations due to the discontinuous nature of discrete states. In this work we introduce Concrete random variables---continuous relaxations of discrete random variables. The Concrete distribution is a new family of distributions with closed form densities and a simple reparameterization. Whenever a discrete stochastic node of a computation graph can be refactored into a one-hot bit representation that is treated continuously, Concrete stochastic nodes can be used with automatic differentiation to produce low-variance biased gradients of objectives (including objectives that depend on the log-probability of latent stochastic nodes) on the corresponding discrete graph. We demonstrate the effectiveness of Concrete relaxations on density estimation and structured prediction tasks using neural networks.},
urldate = {2018-09-12},
date = {2016-11-02},
keywords = {Computer Science - Machine Learning,Statistics - Machine Learning},
author = {Maddison, Chris J. and Mnih, Andriy and Teh, Yee Whye},
file = {C:\\Users\\markus\\Zotero\\storage\\4JCGSNBV\\Maddison et al. - 2016 - The Concrete Distribution A Continuous Relaxation.pdf;C:\\Users\\markus\\Zotero\\storage\\M4WZ8CC4\\1611.html}
}
@article{rezende_stochastic_2014,
langid = {english},
title = {Stochastic {{Backpropagation}} and {{Approximate Inference}} in {{Deep Generative Models}}},
url = {https://arxiv.org/abs/1401.4082},
urldate = {2018-09-12},
date = {2014-01-16},
author = {Rezende, Danilo Jimenez and Mohamed, Shakir and Wierstra, Daan},
file = {C:\\Users\\markus\\Zotero\\storage\\N244K3J2\\Rezende et al. - 2014 - Stochastic Backpropagation and Approximate Inferen.pdf;C:\\Users\\markus\\Zotero\\storage\\LEJMJNS8\\1401.html}
}
@incollection{kingma_variational_2015,
title = {Variational {{Dropout}} and the {{Local Reparameterization Trick}}},
url = {http://papers.nips.cc/paper/5666-variational-dropout-and-the-local-reparameterization-trick.pdf},
booktitle = {Advances in {{Neural Information Processing Systems}} 28},
publisher = {{Curran Associates, Inc.}},
urldate = {2018-09-12},
date = {2015},
pages = {2575--2583},
author = {Kingma, Diederik P and Salimans, Tim and Welling, Max},
editor = {Cortes, C. and Lawrence, N. D. and Lee, D. D. and Sugiyama, M. and Garnett, R.},
file = {C:\\Users\\markus\\Zotero\\storage\\89SIZL5F\\Kingma et al. - 2015 - Variational Dropout and the Local Reparameterizati.pdf;C:\\Users\\markus\\Zotero\\storage\\VE5EGL5C\\5666-variational-dropout-and-the-local-reparameterization-trick.html}
}
@article{tensorflow2015-whitepaper,
title = {{{TensorFlow}}: {{Large}}-{{Scale Machine Learning}} on {{Heterogeneous Systems}}},
url = {https://www.tensorflow.org/},
date = {2015},
author = {Abadi, Mart́ın and Agarwal, Ashish and Barham, Paul and Brevdo, Eugene and Chen, Zhifeng and Citro, Craig and Corrado, Greg S. and Davis, Andy and Dean, Jeffrey and Devin, Matthieu and Ghemawat, Sanjay and Goodfellow, Ian and Harp, Andrew and Irving, Geoffrey and Isard, Michael and Jia, Yangqing and Jozefowicz, Rafal and Kaiser, Lukasz and Kudlur, Manjunath and Levenberg, Josh and Mané, Dandelion and Monga, Rajat and Moore, Sherry and Murray, Derek and Olah, Chris and Schuster, Mike and Shlens, Jonathon and Steiner, Benoit and Sutskever, Ilya and Talwar, Kunal and Tucker, Paul and Vanhoucke, Vincent and Vasudevan, Vijay and Viégas, Fernanda and Vinyals, Oriol and Warden, Pete and Wattenberg, Martin and Wicke, Martin and Yu, Yuan and Zheng, Xiaoqiang},
file = {C:\\Users\\markus\\Zotero\\storage\\WLKK3HI7\\Abadi et al. - 2015 - TensorFlow Large-Scale Machine Learning on Hetero.pdf},
note = {Software available from tensorflow.org}
}