Browse Source

Replace arXiv references with conference publications

Markus Kaiser 3 months ago
parent
commit
ba69eac5ac
2 changed files with 36 additions and 58 deletions
  1. 3
    3
      dynamic_dirichlet_deep_gp.tex
  2. 33
    55
      zotero_export.bib

+ 3
- 3
dynamic_dirichlet_deep_gp.tex View File

@@ -61,7 +61,7 @@ A high number of candidate distributions are reweighed to match the observed dat
61 61
 
62 62
 In contrast, we are interested in a generative process, where data at the same location in the input space could have been generated by a number of global independent processes.
63 63
 Inherently, the data association problem is ill-posed and requires assumptions on both the underlying functions and the association of the observations.
64
-In~\parencite{lazaro-gredilla_overlapping_2011} the authors place Gaussian process priors on the different generative processes which are assumed to be relevant globally.
64
+In~\parencite{lazaro-gredilla_overlapping_2012} the authors place Gaussian process priors on the different generative processes which are assumed to be relevant globally.
65 65
 The associations are modelled via a latent association matrix and inference is done using an expectation maximization algorithm.
66 66
 This approach takes both the inputs and outputs of the training data into account to solve the association problem.
67 67
 A drawback is that the model cannot give a posterior estimate about the relevance of the different modes at different locations in the input space.
@@ -130,7 +130,7 @@ Since we assume the $K$ modes to be independent given the data and assignments,
130 130
 $\Prob*{\mat{F} \given \mat{X}} = \prod_{k=1}^K \Gaussian*{\mat{F^{\pix{k}}} \given \Fun*{\mu^{\pix{k}}}{\mat{X}}, \Fun*{k^{\pix{k}}}{\mat{X}, \mat{X}}}$.
131 131
 Our prior on the assignment process is composite.
132 132
 First, we assume that the $\mat{a_n}$ are drawn independently from multinomial distributions with logit parameters $\mat{\alpha_n} = \left( \alpha_n^{\pix{1}}, \ldots, \alpha_n^{\pix{K}} \right)$.
133
-A usual approach to specifying the $\mat{\alpha_n}$ is to assume them to be known a priori and to be equal for all data points~\parencite{lazaro-gredilla_overlapping_2011}.
133
+A usual approach to specifying the $\mat{\alpha_n}$ is to assume them to be known a priori and to be equal for all data points~\parencite{lazaro-gredilla_overlapping_2012}.
134 134
 Instead, we want to infer them from the data.
135 135
 Specifically, we assume that there is a relationship between the location in the input space $\mathbf{x}$ and the associations.
136 136
 By placing independent GP priors on $\mat{\alpha^{\pix{k}}}$, we can encode our prior knowledge of the associations by the choice of covariance function
@@ -157,7 +157,7 @@ Interdependencies between the data points are introduced through the Gaussian pr
157 157
 
158 158
 The priors for the $f^{\pix{k}}$ can be chosen independently to encode different prior assumptions about the modes.
159 159
 In \cref{subsec:choicenet} we use different kernels to separate a non-linear signal from a noise process.
160
-Going further, we can also use deep Gaussian processes as priors for the $f^{\pix{k}}$ \parencite{damianou_deep_2012, salimbeni_doubly_2017}.
160
+Going further, we can also use deep Gaussian processes as priors for the $f^{\pix{k}}$ \parencite{damianou_deep_2013, salimbeni_doubly_2017}.
161 161
 Since many real word systems are inherently hierarchical, prior knowledge can often be formulated more easily using composite functions \parencite{kaiser_bayesian_2017}.
162 162
 
163 163
 

+ 33
- 55
zotero_export.bib View File

@@ -71,18 +71,18 @@
71 71
   title = {Robust Learning from Demonstration Using Leveraged {{Gaussian}} Processes and Sparse-Constrained Optimization}
72 72
 }
73 73
 
74
-@article{damianou_deep_2012,
75
-  abstract = {In this paper we introduce deep Gaussian process (GP) models. Deep GPs are a deep belief network based on Gaussian process mappings. The data is modeled as the output of a multivariate GP. The inputs to that Gaussian process are then governed by another GP. A single layer model is equivalent to a standard GP or the GP latent variable model (GP-LVM). We perform inference in the model by approximate variational marginalization. This results in a strict lower bound on the marginal likelihood of the model which we use for model selection (number of layers and nodes per layer). Deep belief networks are typically applied to relatively large data sets using stochastic gradient descent for optimization. Our fully Bayesian treatment allows for the application of deep models even when data is scarce. Model selection by our variational bound shows that a five layer hierarchy is justified even when modelling a digit data set containing only 150 examples.},
76
-  archivePrefix = {arXiv},
77
-  author = {Damianou, Andreas C. and Lawrence, Neil D.},
78
-  date = {2012-11-01},
79
-  eprint = {1211.0358},
80
-  eprinttype = {arxiv},
74
+@inproceedings{damianou_deep_2013,
75
+  abstract = {In this paper we introduce deep Gaussian process (GP) models. Deep GPs are a deep belief network based on Gaussian process mappings. The data is modeled as the output of a multivariate GP. The inpu...},
76
+  author = {Damianou, Andreas and Lawrence, Neil},
77
+  booktitle = {Artificial {{Intelligence}} and {{Statistics}}},
78
+  date = {2013-04-29},
79
+  eventtitle = {Artificial {{Intelligence}} and {{Statistics}}},
81 80
   keywords = {60G15; 58E30,Computer Science - Learning,G.1.2,G.3,I.2.6,Mathematics - Probability,Statistics - Machine Learning},
82
-  primaryClass = {cs, math, stat},
81
+  langid = {english},
82
+  pages = {207-215},
83 83
   title = {Deep {{Gaussian Processes}}},
84
-  url = {http://arxiv.org/abs/1211.0358},
85
-  urldate = {2016-09-05}
84
+  url = {http://proceedings.mlr.press/v31/damianou13a.html},
85
+  urldate = {2018-10-02}
86 86
 }
87 87
 
88 88
 @inproceedings{depeweg_decomposition_2018,
@@ -107,20 +107,6 @@
107 107
   urldate = {2016-06-06}
108 108
 }
109 109
 
110
-@article{hathaway_switching_1993,
111
-  abstract = {A family of objective functions called fuzzy c-regression models, which can be used too fit switching regression models to certain types of mixed data, is presented. Minimization of particular objective functions in the family yields simultaneous estimates for the parameters of c regression models, together with a fuzzy c-partitioning of the data. A general optimization approach for the family of objective functions is given and corresponding theoretical convergence results are discussed. The approach is illustrated by two numerical examples that show how it can be used to fit mixed data to coupled linear and nonlinear models},
112
-  author = {Hathaway, R. J. and Bezdek, J. C.},
113
-  date = {1993-08},
114
-  doi = {10.1109/91.236552},
115
-  issn = {1063-6706},
116
-  journaltitle = {IEEE Transactions on Fuzzy Systems},
117
-  keywords = {Clustering algorithms,Computer science,convergence,Convergence,convergence of numerical methods,Couplings,Covariance matrix,fuzzy c-regression models,fuzzy clustering,fuzzy set theory,Fuzzy sets,Linear approximation,Marine animals,minimisation,mixed data,objective functions,parameter estimation,Parameter estimation,statistical analysis,switching regression models,Yield estimation},
118
-  number = {3},
119
-  pages = {195-204},
120
-  title = {Switching Regression Models and Fuzzy Clustering},
121
-  volume = {1}
122
-}
123
-
124 110
 @inproceedings{hein_benchmark_2017,
125 111
   abstract = {In the research area of reinforcement learning (RL), frequently novel and promising methods are developed and introduced to the RL community. However, although many researchers are keen to apply their methods on real-world problems, implementing such methods in real industry environments often is a frustrating and tedious process. Generally, academic research groups have only limited access to real industrial data and applications. For this reason, new methods are usually developed, evaluated and compared by using artificial software benchmarks. On one hand, these benchmarks are designed to provide interpretable RL training scenarios and detailed insight into the learning process of the method on hand. On the other hand, they usually do not share much similarity with industrial real-world applications. For this reason we used our industry experience to design a benchmark which bridges the gap between freely available, documented, and motivated artificial benchmarks and properties of real industrial problems. The resulting industrial benchmark (IB) has been made publicly available to the RL community by publishing its Java and Python code, including an OpenAI Gym wrapper, on Github. In this paper we motivate and describe in detail the IB's dynamics and identify prototypic experimental settings that capture common situations in real-world industry control problems.},
126 112
   author = {Hein, D. and Depeweg, S. and Tokic, M. and Udluft, S. and Hentschel, A. and Runkler, T. A. and Sterzing, V.},
@@ -133,18 +119,14 @@
133 119
   title = {A Benchmark Environment Motivated by Industrial Control Problems}
134 120
 }
135 121
 
136
-@article{hensman_gaussian_2013,
137
-  abstract = {We introduce stochastic variational inference for Gaussian process models. This enables the application of Gaussian process (GP) models to data sets containing millions of data points. We show how GPs can be vari- ationally decomposed to depend on a set of globally relevant inducing variables which factorize the model in the necessary manner to perform variational inference. Our ap- proach is readily extended to models with non-Gaussian likelihoods and latent variable models based around Gaussian processes. We demonstrate the approach on a simple toy problem and two real world data sets.},
138
-  archivePrefix = {arXiv},
122
+@inproceedings{hensman_gaussian_2013,
139 123
   author = {Hensman, James and Fusi, Nicolo and Lawrence, Neil D.},
140
-  date = {2013-09-26},
141
-  eprint = {1309.6835},
142
-  eprinttype = {arxiv},
124
+  booktitle = {Uncertainty in {{Artificial Intelligence}}},
125
+  date = {2013},
143 126
   keywords = {Computer Science - Learning,Statistics - Machine Learning},
144
-  primaryClass = {cs, stat},
145
-  title = {Gaussian {{Processes}} for {{Big Data}}},
146
-  url = {http://arxiv.org/abs/1309.6835},
147
-  urldate = {2016-07-06}
127
+  pages = {282},
128
+  publisher = {{Citeseer}},
129
+  title = {Gaussian {{Processes}} for {{Big Data}}}
148 130
 }
149 131
 
150 132
 @article{hensman_scalable_2015,
@@ -192,7 +174,7 @@
192 174
   urldate = {2018-06-08}
193 175
 }
194 176
 
195
-@incollection{kingma_variational_2015,
177
+@inproceedings{kingma_variational_2015,
196 178
   author = {Kingma, Diederik P and Salimans, Tim and Welling, Max},
197 179
   booktitle = {Advances in {{Neural Information Processing Systems}} 28},
198 180
   date = {2015},
@@ -204,18 +186,15 @@
204 186
   urldate = {2018-09-12}
205 187
 }
206 188
 
207
-@article{lazaro-gredilla_overlapping_2011,
208
-  abstract = {In this work we introduce a mixture of GPs to address the data association problem, i.e. to label a group of observations according to the sources that generated them. Unlike several previously proposed GP mixtures, the novel mixture has the distinct characteristic of using no gating function to determine the association of samples and mixture components. Instead, all the GPs in the mixture are global and samples are clustered following "trajectories" across input space. We use a non-standard variational Bayesian algorithm to efficiently recover sample labels and learn the hyperparameters. We show how multi-object tracking problems can be disambiguated and also explore the characteristics of the model in traditional regression settings.},
209
-  archivePrefix = {arXiv},
210
-  author = {Lázaro-Gredilla, Miguel and Van Vaerenbergh, Steven and Lawrence, Neil},
211
-  date = {2011-08-16},
212
-  eprint = {1108.3372},
213
-  eprinttype = {arxiv},
189
+@article{lazaro-gredilla_overlapping_2012,
190
+  author = {Lázaro-Gredilla, Miguel and Van Vaerenbergh, Steven and Lawrence, Neil D.},
191
+  date = {2012},
192
+  journaltitle = {Pattern Recognition},
214 193
   keywords = {Computer Science - Artificial Intelligence,Computer Science - Machine Learning,Statistics - Machine Learning},
215
-  primaryClass = {cs, stat},
216
-  title = {Overlapping {{Mixtures}} of {{Gaussian Processes}} for the {{Data Association Problem}}},
217
-  url = {http://arxiv.org/abs/1108.3372},
218
-  urldate = {2018-08-06}
194
+  number = {4},
195
+  pages = {1386--1395},
196
+  title = {Overlapping Mixtures of {{Gaussian}} Processes for the Data Association Problem},
197
+  volume = {45}
219 198
 }
220 199
 
221 200
 @article{maddison_concrete_2016,
@@ -276,18 +255,17 @@
276 255
   volume = {589}
277 256
 }
278 257
 
279
-@article{salimbeni_doubly_2017,
280
-  abstract = {Gaussian processes (GPs) are a good choice for function approximation as they are flexible, robust to over-fitting, and provide well-calibrated predictive uncertainty. Deep Gaussian processes (DGPs) are multi-layer generalisations of GPs, but inference in these models has proved challenging. Existing approaches to inference in DGP models assume approximate posteriors that force independence between the layers, and do not work well in practice. We present a doubly stochastic variational inference algorithm, which does not force independence between layers. With our method of inference we demonstrate that a DGP model can be used effectively on data ranging in size from hundreds to a billion points. We provide strong empirical evidence that our inference scheme for DGPs works well in practice in both classification and regression.},
281
-  archivePrefix = {arXiv},
258
+@inproceedings{salimbeni_doubly_2017,
282 259
   author = {Salimbeni, Hugh and Deisenroth, Marc},
283
-  date = {2017-05-24},
284
-  eprint = {1705.08933},
285
-  eprinttype = {arxiv},
260
+  booktitle = {Advances in {{Neural Information Processing Systems}} 30},
261
+  date = {2017},
262
+  editor = {Guyon, I. and Luxburg, U. V. and Bengio, S. and Wallach, H. and Fergus, R. and Vishwanathan, S. and Garnett, R.},
286 263
   keywords = {Statistics - Machine Learning},
287
-  primaryClass = {stat},
264
+  pages = {4588--4599},
265
+  publisher = {{Curran Associates, Inc.}},
288 266
   title = {Doubly {{Stochastic Variational Inference}} for {{Deep Gaussian Processes}}},
289
-  url = {http://arxiv.org/abs/1705.08933},
290
-  urldate = {2017-06-02}
267
+  url = {http://papers.nips.cc/paper/7045-doubly-stochastic-variational-inference-for-deep-gaussian-processes.pdf},
268
+  urldate = {2018-10-02}
291 269
 }
292 270
 
293 271
 @article{tensorflow2015-whitepaper,

Loading…
Cancel
Save