Browse Source

Start variational formulation

arxiv
Markus Kaiser 1 year ago
parent
commit
8645b4ad60

+ 1
- 1
.latexmkrc View File

@@ -1,4 +1,4 @@
1 1
 @default_files = ("bayesian_warped_dependent_gp.tex");
2 2
 
3 3
 $pdf_mode = 1;
4
-$pdflatex="lualatex --shell-escape --interaction=nonstopmode %O %S";
4
+$pdflatex="lualatex --shell-escape --file-line-error --interaction=nonstopmode %O %S";

+ 21
- 9
bayesian_warped_dependent_gp.tex View File

@@ -98,7 +98,7 @@ The final model is then given by
98 98
     f_d(\mat{x}) &= \sum_{r=1}^R \int_\Omega T_{d,r}(\mat{x} - \mat{z}) \cdot u_r(\mat{z}) \diff \mat{z},
99 99
 \end{split}
100 100
 \end{align}
101
-where $a_d$ and $g_d$ are the respective alignment and warping functions and $\mat{\epsilon_d} \sim \Gaussian{0, \sigma^2\Eye}$ is a noise term.
101
+where $a_d$ and $g_d$ are the respective alignment and warping functions and $\mat{\epsilon_d} \sim \Gaussian{0, \sigma_{\epsilon, d}^2\Eye}$ is a noise term.
102 102
 Because we assume independence between the two functions across outputs, we use Gaussian process priors of the form
103 103
 \begin{align}
104 104
     a_d &\sim \GP(\id, k_{a, d}), & g_d &\sim \GP(\id, k_{g, d}),
@@ -180,7 +180,7 @@ Their cross-covariance-terms closely resemble the original RBF kernel.
180 180
 In order to allow for more flexibility, we added the alignment functions $a_d$ and the warpings $g_d$.
181 181
 The alignment function (which we assume to be close to the identity function) models non-stationary local shifts between the different output functions and the warpings allow for the output functions to live on different scales and topologies, removing the constraint that the function must be linear combinations of the convolutions.
182 182
 This model can be interpreted as a shared and warped latent variable model with a very specific prior:
183
-The indices $\mat{X}$ are part of the prior for the latent space $a_d(\mat{X})$ and specify a sense of order for the different data points $\mat{y}$ which are augmented with uncertainty by the alignment functions.
183
+The indices $\mat{X}$ are part of the prior for the latent space $a_d(\mat{X})$ and specify a sense of order for the different data points $\mat{y}$ which is augmented with uncertainty by the alignment functions.
184 184
 \todo{More specifically: Does a linear alignment plus an RBF kernel yield the dynamic GP-LVM?}Using this order, the convolution processes enforce the covariance structure for the different datapoints specified by the smoothing kernels.
185 185
 
186 186
 In contrast, \cref{fig:graphical_model_supervised} shows that the presented model can also be interpreted as a group of $D$ deep GPs with a layer with shared information between the different functions, i.e. a transfer learning setting.
@@ -191,13 +191,25 @@ Note that neither the index set nor the observations need to live in the same sp
191 191
 
192 192
 \section{Variational approximation}
193 193
 \label{sec:variational_approximation}
194
-\begin{itemize}
195
-    \item SVGP
196
-    \item Nested Variational Compression
197
-    \item Psi-statstics for dependent GP
198
-    \item Maybe mention the possibility to do sampling like Hugh does?
199
-\end{itemize}
200
-
194
+Analagously to $\mat{y} = \left( \mat{y_1}, \dots, \mat{y_D} \right)$, we denote as $\rv{g}$, $\rv{f}$ and $\rv{a}$ the random vectors of size $ND$ containing the function values of the respective functions and outputs.
195
+The joint probability distribution of th model can then be written as
196
+\begin{align}
197
+\begin{split}
198
+    \MoveEqLeft[1]\Prob{\rv{y}, \rv{g}, \rv{f}, \rv{a} \given \mat{X}} = \\
199
+    &\Prob{\rv{f} \given \rv{a}} \cdot \prod_{d=1}^D \Prob{\rv{y_d} \given \rv{g_d}}\Prob{\rv{g_d} \given \rv{f_d}}\Prob{\rv{a_d} \given \rv{X}}
200
+\end{split}
201
+\end{align}
202
+with
203
+\begin{align*}
204
+    \rv{a_d} \mid \mat{X} &\sim \Gaussian{\mat{X}, \mat{K_{a, d}}}, \\
205
+    \rv{f} \mid \mat{a} &\sim \Gaussian{\mat{0}, \mat{K_f}}, \\
206
+    \rv{g_d} \mid \mat{f_d} &\sim \Gaussian{\mat{f_d}, \mat{K_{g, d}}}, \\
207
+    \rv{y_d} \mid \mat{g_d} &\sim \Gaussian{\mat{g_d}, \sigma^2_{\epsilon, d}\Eye}.
208
+\end{align*}
209
+Everything but the convolutional processes factorize over both the different levels of the model as well as the different outputs.
210
+Direct inference in infeasible in this model, since all but the likelihood terms are Gaussian processes, the whole model can be interpreted as a specific deep Gaussian process.
211
+
212
+To achieve computational tractability, we will follow along the lines of \citeauthor{hensman_nested_2014} \cite{hensman_nested_2014} and apply nested variational compression.
201 213
 
202 214
 \section{Experiments}
203 215
 \label{sec:experiments}

+ 4
- 4
figures/graphical_model_generative.tex View File

@@ -18,10 +18,10 @@
18 18
         component direction=right, component sep=1ex,
19 19
         nodes={latent},
20 20
         ] {U1/$\rv{u_1}$, U2/$\dots$[draw=none, fill=none], U3/$\rv{u_R}$};
21
-        X/$\rv{X_d}$[observed] ->[draw, directed] A/$\rv{A_d}$ ->[draw, directed] F/$\rv{F_d}$ -- P/$\rv{M^F_d}$[variational] -- Pp/$\rv{M^F_{d^\prime}}$[variational] -- Fp/$\rv{F_{d^\prime}}$ ->[draw, inverse directed] Ap/$\rv{A_{d^\prime}}$ ->[draw, inverse directed] Xp/$\rv{X_{d^\prime}}$[observed];
22
-        % M/$\rv{M^A_d}$[variational] -- G/$\rv{G_d}$ -- Q/$\rv{M^G_d}$[variational] -- Qp/$\rv{M^G_{d^\prime}}$[variational] -- Gp/$\rv{G_{d^\prime}}$ -- Mp/$\rv{M^A_{d^\prime}}$[variational];
23
-        M/$\rv{M^A_d}$[variational] -- G/$\rv{G_d}$ -- Q/$\rv{M^G_d}$[variational] -- Qp/$\rv{M^G_{d^\prime}}$[variational] -- Gp/$\rv{G_{d^\prime}}$ -- Mp/$\rv{M^A_{d^\prime}}$[variational];
24
-        Y/$\rv{Y_d}$[observed] -- /[draw=none] -- /[draw=none] -- Yp/$\rv{Y_{d^\prime}}$[observed];
21
+        X/$\rv{X_d}$[observed] ->[draw, directed] A/$\rv{a_d}$ ->[draw, directed] F/$\rv{f_d}$ -- P/$\rv{m^f_d}$[variational] -- Pp/$\rv{m^f_{d^\prime}}$[variational] -- Fp/$\rv{f_{d^\prime}}$ ->[draw, inverse directed] Ap/$\rv{a_{d^\prime}}$ ->[draw, inverse directed] Xp/$\rv{X_{d^\prime}}$[observed];
22
+        % M/$\rv{M^a_d}$[variational] -- G/$\rv{g_d}$ -- Q/$\rv{M^g_d}$[variational] -- Qp/$\rv{m^g_{d^\prime}}$[variational] -- Gp/$\rv{g_{d^\prime}}$ -- Mp/$\rv{m^a_{d^\prime}}$[variational];
23
+        M/$\rv{m^a_d}$[variational] -- G/$\rv{g_d}$ -- Q/$\rv{m^g_d}$[variational] -- Qp/$\rv{m^g_{d^\prime}}$[variational] -- Gp/$\rv{g_{d^\prime}}$ -- Mp/$\rv{m^a_{d^\prime}}$[variational];
24
+        Y/$\rv{y_d}$[observed] -- /[draw=none] -- /[draw=none] -- Yp/$\rv{y_{d^\prime}}$[observed];
25 25
     };
26 26
 
27 27
     \draw[edge, directed] (Us) -- (F);

+ 4
- 4
figures/graphical_model_supervised.tex View File

@@ -15,15 +15,15 @@
15 15
     component sep=4em,
16 16
     ] {
17 17
         {
18
-            M/$\rv{M^A_d}$[variational] --[draw=none] P/$\rv{M^F_d}$[variational] --[draw=none] Q/$\rv{M^G_d}$[variational],
19
-            X/$\rv{X_d}$[observed] -> A/$\rv{A_d}$ -> F/$\rv{F_d}$ -> G/$\rv{G_d}$ -> Y/$\rv{Y_d}$[observed],
18
+            M/$\rv{m^a_d}$[variational] --[draw=none] P/$\rv{m^f_d}$[variational] --[draw=none] Q/$\rv{m^g_d}$[variational],
19
+            X/$\rv{X_d}$[observed] -> A/$\rv{a_d}$ -> F/$\rv{f_d}$ -> G/$\rv{g_d}$ -> Y/$\rv{y_d}$[observed],
20 20
             Us/""[draw, thick] //[
21 21
                 tree layout,
22 22
                 component direction=down, component sep=1ex,
23 23
                 nodes={latent},
24 24
                 ] {U1/$\rv{u_1}$, U2/$\rvdots$[draw=none, fill=none], U3/$\rv{u_R}$},
25
-            Xp/$\rv{X_{d^\prime}}$[observed] -> Ap/$\rv{A_{d^\prime}}$ -> Fp/$\rv{F_{d^\prime}}$ -> Gp/$\rv{G_{d^\prime}}$ -> Yp/$\rv{Y_{d^\prime}}$[observed],
26
-            Mp/$\rv{M^A_{d^\prime}}$[variational] --[draw=none] Pp/$\rv{M^F_{d^\prime}}$[variational] --[draw=none] Qp/$\rv{M^G_{d^\prime}}$[variational],
25
+            Xp/$\rv{X_{d^\prime}}$[observed] -> Ap/$\rv{a_{d^\prime}}$ -> Fp/$\rv{f_{d^\prime}}$ -> Gp/$\rv{g_{d^\prime}}$ -> Yp/$\rv{y_{d^\prime}}$[observed],
26
+            Mp/$\rv{m^a_{d^\prime}}$[variational] --[draw=none] Pp/$\rv{m^f_{d^\prime}}$[variational] --[draw=none] Qp/$\rv{m^g_{d^\prime}}$[variational],
27 27
         };
28 28
     };
29 29
 

Loading…
Cancel
Save