Browse Source

Add MDN and noise references

Markus Kaiser 3 months ago
parent
commit
7df4488028
3 changed files with 32 additions and 5 deletions
  1. 5
    4
      dynamic_dirichlet_deep_gp.tex
  2. 1
    1
      preamble/packages.tex
  3. 26
    0
      zotero_export.bib

+ 5
- 4
dynamic_dirichlet_deep_gp.tex View File

@@ -48,17 +48,18 @@ This setup emulates an industrial system in which, for example due to wear or de
48 48
 In this setting, we want to recover both joint predictions marginalizing the current state of operation but also informative models for these separate states.
49 49
 
50 50
 Estimating a model in this scenario is often referred to as a \emph{data association problem} \parencite{Bar-Shalom:1987, Cox93areview} where both the different functions and the associations of the observations to a function need to be estimated.
51
-A simple example of this can be seen in \cref{fig:semi_bimodal:b}, where no single function could have generated the data.
51
+A simple example of this can be seen in \cref{fig:semi_bimodal}, where no single function could have generated the data.
52 52
 A slightly different view of the same problem is to consider the data to have been generated by a mixture of processes where we are interested to factorise the data into these components~\parencite{choi_choicenet_2018}.
53
-The separation of underlying signal and a noise process is an application of the latter, where we consider certain observations to be noise and others to be signal\todo{noisy data reference?}.
53
+The separation of underlying signal and a noise process is an application of the latter, where we consider certain observations to be noise and others to be signal~\parencite{rousseeuw_robust_2005,hodge_survey_2004}.
54 54
 
55 55
 Early approaches to explaining data using multiple generative processes is based on separating the input space and training local expert models explaining easier subtasks~\parencite{jacobs_adaptive_1991,tresp_mixtures_2001, rasmussen_infinite_2002}.
56 56
 The assignment of data points to local experts is handled by a gating network, which learns a function from the inputs to assignment probabilities.
57 57
 However, it is still a central assumption of these models that the underlying generative process is unimodal.
58 58
 That is, at every position in the input space, exactly one expert explains the data.
59
-On the input space as a whole, this induces non-stationary behaviour through the different experts.
59
+Another approach is presented in~\parencite{bishop_mixture_1994}, where the multimodal regression tasks are interpreted as a density estimation problem.
60
+A high number of candidate distributions are reweighed to match the observed data without modeling the underlying generative process.
60 61
 
61
-In contrast, we are interested in a generative process where data at the same location in the input space could have been generated by multiple independent processes.
62
+In contrast, we are interested in a generative process where data at the same location in the input space could have been generated by a number of global independent processes.
62 63
 Inherently, the data association problem is ill-posed and requires assumptions on both the underlying functions and the association of the observations.
63 64
 In~\parencite{lazaro-gredilla_overlapping_2011}, the authors place Gaussian process priors on the different generative processes which are assumed to be relevant globally.
64 65
 The associations are modelled via a latent association matrix and the model is trained using an EM algorithm.

+ 1
- 1
preamble/packages.tex View File

@@ -47,7 +47,7 @@
47 47
 \usepackage{url}
48 48
 \usepackage[capitalise, nameinlink, noabbrev]{cleveref}
49 49
 \crefformat{equation}{(#2#1#3)}
50
-\usepackage[style=numeric, backend=biber, url=false]{biblatex}
50
+\usepackage[style=numeric, backend=biber, url=false, maxcitenames=1]{biblatex}
51 51
 
52 52
 % Misc
53 53
 \usepackage{todonotes}

+ 26
- 0
zotero_export.bib View File

@@ -13,6 +13,12 @@
13 13
   volume = {SMC-13}
14 14
 }
15 15
 
16
+@report{bishop_mixture_1994,
17
+  author = {Bishop, Christopher M.},
18
+  date = {1994},
19
+  title = {Mixture Density Networks}
20
+}
21
+
16 22
 @article{bodin_latent_2017,
17 23
   abstract = {We introduce Latent Gaussian Process Regression which is a latent variable extension allowing modelling of non-stationary processes using stationary GP priors. The approach is built on extending the input space of a regression problem with a latent variable that is used to modulate the covariance function over the input space. We show how our approach can be used to model non-stationary processes but also how multi-modal or non-functional processes can be described where the input signal cannot fully disambiguate the output. We exemplify the approach on a set of synthetic data and provide results on real data from geostatistics.},
18 24
   archivePrefix = {arXiv},
@@ -152,6 +158,16 @@
152 158
   volume = {38}
153 159
 }
154 160
 
161
+@article{hodge_survey_2004,
162
+  author = {Hodge, Victoria and Austin, Jim},
163
+  date = {2004},
164
+  journaltitle = {Artificial intelligence review},
165
+  number = {2},
166
+  pages = {85--126},
167
+  title = {A Survey of Outlier Detection Methodologies},
168
+  volume = {22}
169
+}
170
+
155 171
 @article{jacobs_adaptive_1991,
156 172
   author = {Jacobs, Robert A. and Jordan, Michael I. and Nowlan, Steven J. and Hinton, Geoffrey E.},
157 173
   date = {1991},
@@ -252,6 +268,14 @@
252 268
   urldate = {2018-09-12}
253 269
 }
254 270
 
271
+@book{rousseeuw_robust_2005,
272
+  author = {Rousseeuw, Peter J. and Leroy, Annick M.},
273
+  date = {2005},
274
+  publisher = {{John wiley \& sons}},
275
+  title = {Robust Regression and Outlier Detection},
276
+  volume = {589}
277
+}
278
+
255 279
 @article{salimbeni_doubly_2017,
256 280
   abstract = {Gaussian processes (GPs) are a good choice for function approximation as they are flexible, robust to over-fitting, and provide well-calibrated predictive uncertainty. Deep Gaussian processes (DGPs) are multi-layer generalisations of GPs, but inference in these models has proved challenging. Existing approaches to inference in DGP models assume approximate posteriors that force independence between the layers, and do not work well in practice. We present a doubly stochastic variational inference algorithm, which does not force independence between layers. With our method of inference we demonstrate that a DGP model can be used effectively on data ranging in size from hundreds to a billion points. We provide strong empirical evidence that our inference scheme for DGPs works well in practice in both classification and regression.},
257 281
   archivePrefix = {arXiv},
@@ -296,3 +320,5 @@
296 320
   url = {http://papers.nips.cc/paper/1900-mixtures-of-gaussian-processes.pdf},
297 321
   urldate = {2018-09-26}
298 322
 }
323
+
324
+

Loading…
Cancel
Save