Browse Source

Start variational formulation

arxiv
Markus Kaiser 2 years ago
parent
commit
8645b4ad60
4 changed files with 30 additions and 18 deletions
  1. +1
    -1
      .latexmkrc
  2. +21
    -9
      bayesian_warped_dependent_gp.tex
  3. +4
    -4
      figures/graphical_model_generative.tex
  4. +4
    -4
      figures/graphical_model_supervised.tex

+ 1
- 1
.latexmkrc View File

@@ -1,4 +1,4 @@
@default_files = ("bayesian_warped_dependent_gp.tex");

$pdf_mode = 1;
$pdflatex="lualatex --shell-escape --interaction=nonstopmode %O %S";
$pdflatex="lualatex --shell-escape --file-line-error --interaction=nonstopmode %O %S";

+ 21
- 9
bayesian_warped_dependent_gp.tex View File

@@ -98,7 +98,7 @@ The final model is then given by
f_d(\mat{x}) &= \sum_{r=1}^R \int_\Omega T_{d,r}(\mat{x} - \mat{z}) \cdot u_r(\mat{z}) \diff \mat{z},
\end{split}
\end{align}
where $a_d$ and $g_d$ are the respective alignment and warping functions and $\mat{\epsilon_d} \sim \Gaussian{0, \sigma^2\Eye}$ is a noise term.
where $a_d$ and $g_d$ are the respective alignment and warping functions and $\mat{\epsilon_d} \sim \Gaussian{0, \sigma_{\epsilon, d}^2\Eye}$ is a noise term.
Because we assume independence between the two functions across outputs, we use Gaussian process priors of the form
\begin{align}
a_d &\sim \GP(\id, k_{a, d}), & g_d &\sim \GP(\id, k_{g, d}),
@@ -180,7 +180,7 @@ Their cross-covariance-terms closely resemble the original RBF kernel.
In order to allow for more flexibility, we added the alignment functions $a_d$ and the warpings $g_d$.
The alignment function (which we assume to be close to the identity function) models non-stationary local shifts between the different output functions and the warpings allow for the output functions to live on different scales and topologies, removing the constraint that the function must be linear combinations of the convolutions.
This model can be interpreted as a shared and warped latent variable model with a very specific prior:
The indices $\mat{X}$ are part of the prior for the latent space $a_d(\mat{X})$ and specify a sense of order for the different data points $\mat{y}$ which are augmented with uncertainty by the alignment functions.
The indices $\mat{X}$ are part of the prior for the latent space $a_d(\mat{X})$ and specify a sense of order for the different data points $\mat{y}$ which is augmented with uncertainty by the alignment functions.
\todo{More specifically: Does a linear alignment plus an RBF kernel yield the dynamic GP-LVM?}Using this order, the convolution processes enforce the covariance structure for the different datapoints specified by the smoothing kernels.

In contrast, \cref{fig:graphical_model_supervised} shows that the presented model can also be interpreted as a group of $D$ deep GPs with a layer with shared information between the different functions, i.e. a transfer learning setting.
@@ -191,13 +191,25 @@ Note that neither the index set nor the observations need to live in the same sp

\section{Variational approximation}
\label{sec:variational_approximation}
\begin{itemize}
\item SVGP
\item Nested Variational Compression
\item Psi-statstics for dependent GP
\item Maybe mention the possibility to do sampling like Hugh does?
\end{itemize}

Analagously to $\mat{y} = \left( \mat{y_1}, \dots, \mat{y_D} \right)$, we denote as $\rv{g}$, $\rv{f}$ and $\rv{a}$ the random vectors of size $ND$ containing the function values of the respective functions and outputs.
The joint probability distribution of th model can then be written as
\begin{align}
\begin{split}
\MoveEqLeft[1]\Prob{\rv{y}, \rv{g}, \rv{f}, \rv{a} \given \mat{X}} = \\
&\Prob{\rv{f} \given \rv{a}} \cdot \prod_{d=1}^D \Prob{\rv{y_d} \given \rv{g_d}}\Prob{\rv{g_d} \given \rv{f_d}}\Prob{\rv{a_d} \given \rv{X}}
\end{split}
\end{align}
with
\begin{align*}
\rv{a_d} \mid \mat{X} &\sim \Gaussian{\mat{X}, \mat{K_{a, d}}}, \\
\rv{f} \mid \mat{a} &\sim \Gaussian{\mat{0}, \mat{K_f}}, \\
\rv{g_d} \mid \mat{f_d} &\sim \Gaussian{\mat{f_d}, \mat{K_{g, d}}}, \\
\rv{y_d} \mid \mat{g_d} &\sim \Gaussian{\mat{g_d}, \sigma^2_{\epsilon, d}\Eye}.
\end{align*}
Everything but the convolutional processes factorize over both the different levels of the model as well as the different outputs.
Direct inference in infeasible in this model, since all but the likelihood terms are Gaussian processes, the whole model can be interpreted as a specific deep Gaussian process.

To achieve computational tractability, we will follow along the lines of \citeauthor{hensman_nested_2014} \cite{hensman_nested_2014} and apply nested variational compression.

\section{Experiments}
\label{sec:experiments}

+ 4
- 4
figures/graphical_model_generative.tex View File

@@ -18,10 +18,10 @@
component direction=right, component sep=1ex,
nodes={latent},
] {U1/$\rv{u_1}$, U2/$\dots$[draw=none, fill=none], U3/$\rv{u_R}$};
X/$\rv{X_d}$[observed] ->[draw, directed] A/$\rv{A_d}$ ->[draw, directed] F/$\rv{F_d}$ -- P/$\rv{M^F_d}$[variational] -- Pp/$\rv{M^F_{d^\prime}}$[variational] -- Fp/$\rv{F_{d^\prime}}$ ->[draw, inverse directed] Ap/$\rv{A_{d^\prime}}$ ->[draw, inverse directed] Xp/$\rv{X_{d^\prime}}$[observed];
% M/$\rv{M^A_d}$[variational] -- G/$\rv{G_d}$ -- Q/$\rv{M^G_d}$[variational] -- Qp/$\rv{M^G_{d^\prime}}$[variational] -- Gp/$\rv{G_{d^\prime}}$ -- Mp/$\rv{M^A_{d^\prime}}$[variational];
M/$\rv{M^A_d}$[variational] -- G/$\rv{G_d}$ -- Q/$\rv{M^G_d}$[variational] -- Qp/$\rv{M^G_{d^\prime}}$[variational] -- Gp/$\rv{G_{d^\prime}}$ -- Mp/$\rv{M^A_{d^\prime}}$[variational];
Y/$\rv{Y_d}$[observed] -- /[draw=none] -- /[draw=none] -- Yp/$\rv{Y_{d^\prime}}$[observed];
X/$\rv{X_d}$[observed] ->[draw, directed] A/$\rv{a_d}$ ->[draw, directed] F/$\rv{f_d}$ -- P/$\rv{m^f_d}$[variational] -- Pp/$\rv{m^f_{d^\prime}}$[variational] -- Fp/$\rv{f_{d^\prime}}$ ->[draw, inverse directed] Ap/$\rv{a_{d^\prime}}$ ->[draw, inverse directed] Xp/$\rv{X_{d^\prime}}$[observed];
% M/$\rv{M^a_d}$[variational] -- G/$\rv{g_d}$ -- Q/$\rv{M^g_d}$[variational] -- Qp/$\rv{m^g_{d^\prime}}$[variational] -- Gp/$\rv{g_{d^\prime}}$ -- Mp/$\rv{m^a_{d^\prime}}$[variational];
M/$\rv{m^a_d}$[variational] -- G/$\rv{g_d}$ -- Q/$\rv{m^g_d}$[variational] -- Qp/$\rv{m^g_{d^\prime}}$[variational] -- Gp/$\rv{g_{d^\prime}}$ -- Mp/$\rv{m^a_{d^\prime}}$[variational];
Y/$\rv{y_d}$[observed] -- /[draw=none] -- /[draw=none] -- Yp/$\rv{y_{d^\prime}}$[observed];
};

\draw[edge, directed] (Us) -- (F);

+ 4
- 4
figures/graphical_model_supervised.tex View File

@@ -15,15 +15,15 @@
component sep=4em,
] {
{
M/$\rv{M^A_d}$[variational] --[draw=none] P/$\rv{M^F_d}$[variational] --[draw=none] Q/$\rv{M^G_d}$[variational],
X/$\rv{X_d}$[observed] -> A/$\rv{A_d}$ -> F/$\rv{F_d}$ -> G/$\rv{G_d}$ -> Y/$\rv{Y_d}$[observed],
M/$\rv{m^a_d}$[variational] --[draw=none] P/$\rv{m^f_d}$[variational] --[draw=none] Q/$\rv{m^g_d}$[variational],
X/$\rv{X_d}$[observed] -> A/$\rv{a_d}$ -> F/$\rv{f_d}$ -> G/$\rv{g_d}$ -> Y/$\rv{y_d}$[observed],
Us/""[draw, thick] //[
tree layout,
component direction=down, component sep=1ex,
nodes={latent},
] {U1/$\rv{u_1}$, U2/$\rvdots$[draw=none, fill=none], U3/$\rv{u_R}$},
Xp/$\rv{X_{d^\prime}}$[observed] -> Ap/$\rv{A_{d^\prime}}$ -> Fp/$\rv{F_{d^\prime}}$ -> Gp/$\rv{G_{d^\prime}}$ -> Yp/$\rv{Y_{d^\prime}}$[observed],
Mp/$\rv{M^A_{d^\prime}}$[variational] --[draw=none] Pp/$\rv{M^F_{d^\prime}}$[variational] --[draw=none] Qp/$\rv{M^G_{d^\prime}}$[variational],
Xp/$\rv{X_{d^\prime}}$[observed] -> Ap/$\rv{a_{d^\prime}}$ -> Fp/$\rv{f_{d^\prime}}$ -> Gp/$\rv{g_{d^\prime}}$ -> Yp/$\rv{y_{d^\prime}}$[observed],
Mp/$\rv{m^a_{d^\prime}}$[variational] --[draw=none] Pp/$\rv{m^f_{d^\prime}}$[variational] --[draw=none] Qp/$\rv{m^g_{d^\prime}}$[variational],
};
};


Loading…
Cancel
Save