\documentclass[beamer,tikz,crop]{standalone}
|
|
\input{preamble/tikz_standalone.tex}
|
|
\input{preamble/tikz_common.tex}
|
|
\input{preamble/tikz_style.tex}
|
|
\input{preamble/tikz_colors.tex}
|
|
\input{preamble/tikz_jumping.tex}
|
|
\input{../abbreviations.tex}
|
|
|
|
\begin{document}
|
|
\begin{tikzpicture}[stop jumping]
|
|
\begin{scope}[x=6em, y=8ex]
|
|
\node[random variable, observed] (s0) at (0, 0) {$\mat{s}_0$};
|
|
\node[random variable, latent] (s1) at (1, 0) {$\mat{s}_1$};
|
|
\node[random variable, latent] (s2) at (2, 0) {$\mat{s}_2$};
|
|
\node[random variable, latent] (sT) at (3, 0) {$\mat{s}_T$};
|
|
|
|
\node[random variable, latent] (r0) at (0, -1) {$\mat{r}_0$};
|
|
\node[random variable, latent] (r1) at (1, -1) {$\mat{r}_1$};
|
|
\node[random variable, latent] (r2) at (2, -1) {$\mat{r}_2$};
|
|
\node[random variable, latent] (rT) at (3, -1) {$\mat{r}_T$};
|
|
|
|
\node[random variable, latent] (a0) at (0.5, 1) {$\mat{a}_0$};
|
|
\node[random variable, latent] (a1) at (1.5, 1) {$\mat{a}_1$};
|
|
\node[random variable, latent] (aT) at (2.5, 1) {$\mat{a}_{T-1}$};
|
|
|
|
\node[random variable, hyperparameter] (Pi) at (1.5, 2) {$\mat{\theta}_\pi$};
|
|
|
|
\node[random variable, latent] (V) at (1.5, -2) {$\mat{J}^\pi$};
|
|
|
|
% ---
|
|
|
|
\only<-2>{
|
|
\node[random variable, observed] (f) at (-0.75, 0.5) {$\mat{f}$};
|
|
|
|
\draw[edge, directed] (f) -| (s1);
|
|
\draw[edge, directed] (f) -| (s2);
|
|
\draw[edge, directed, dashed] (f) -| (sT);
|
|
}
|
|
|
|
\draw[edge, directed] (Pi) -| (a0);
|
|
\draw[edge, directed] (Pi) -- (a1);
|
|
\draw[edge, directed, dashed] (Pi) -| (aT);
|
|
|
|
\draw[edge, directed] (s0) -- (a0);
|
|
\draw[edge, directed] (s1) -- (a1);
|
|
\draw[edge, directed, dashed] (s2) -- (aT);
|
|
|
|
\draw[edge, directed] (a0) -- (s1);
|
|
\draw[edge, directed] (a1) -- (s2);
|
|
\draw[edge, directed] (aT) -- (sT);
|
|
|
|
\draw[edge, directed] (s0) -- (s1);
|
|
\draw[edge, directed] (s1) -- (s2);
|
|
\draw[edge, directed, dashed] (s2) -- (sT);
|
|
|
|
\draw[edge, directed] (s0) -- (r0);
|
|
\draw[edge, directed] (s1) -- (r1);
|
|
\draw[edge, directed] (s2) -- (r2);
|
|
\draw[edge, directed] (sT) -- (rT);
|
|
|
|
\draw[edge, directed] (r0) |- (V);
|
|
\draw[edge, directed] (r1) |- (V);
|
|
\draw[edge, directed] (r2) |- (V);
|
|
\draw[edge, directed, dashed] (rT) |- (V);
|
|
\end{scope}
|
|
|
|
\only<1>{
|
|
\begin{scope}[on background layer]
|
|
\node[
|
|
plate, inner xsep=15pt, inner ysep=4pt,
|
|
sBlueDark,
|
|
"Policy"{sBlueDark, above},
|
|
fit=(Pi),
|
|
] {};
|
|
\node[
|
|
plate, inner sep=5pt,
|
|
sBlueDark,
|
|
"Trajectory"{sBlueDark, left},
|
|
fit=(f)(a0)(sT),
|
|
] {};
|
|
\node[
|
|
plate, inner xsep=15pt, inner ysep=4pt,
|
|
sBlueDark,
|
|
"Value"{sBlueDark, below},
|
|
fit=(V),
|
|
] {};
|
|
\end{scope}
|
|
}
|
|
|
|
\only<2>{
|
|
\begin{scope}[
|
|
x=2.5em, y=5ex,
|
|
xshift=-16em,
|
|
yshift=5em,
|
|
opacity=0.6,
|
|
]
|
|
\tikzstyle{tiny random variable} = [random variable, inner sep=0pt, minimum size=15pt, font=\tiny]
|
|
\node[tiny random variable, observed, xshift=-2ex] (Xsn) at (1, 0.25) {$\rv{s}_t$};
|
|
\node[tiny random variable, observed, xshift=2ex] (Xan) at (1, 0.25) {$\rv{a}_t$};
|
|
|
|
\begin{scope}[on background layer]
|
|
\node[plate, opacity=0.6, fit=(Xsn)(Xan)] (Xn) {};
|
|
\end{scope}
|
|
\node[tiny random variable, observed] (yn) at (.8, -3) {$\rv{s}_{t+1}$};
|
|
|
|
\node[tiny random variable, latent] (fnm) at (0, -1) {$\rv{f}_t^{\pix{k}}$};
|
|
\node[tiny random variable, latent] (sigmanm) at (1, -1) {$\rv{\sigma}_t^{\pix{k}}$};
|
|
\node[tiny random variable, latent] (ynm) at (0, -2) {$\rv{s}_{t+1}^{\pix{k}}$};
|
|
|
|
\node[tiny random variable, latent] (alphanm) at (2, -1) {$\rv{\lambda}_t^{\pix{k}}$};
|
|
\node[tiny random variable, latent] (an) at (2, -3) {$\rv{l}_t$};
|
|
|
|
\draw[edge, directed] (Xn) -| (fnm);
|
|
\draw[edge, directed] (Xn) -- (sigmanm);
|
|
\draw[edge, directed] (sigmanm) -- (ynm);
|
|
\draw[edge, directed] (Xn) -| (alphanm);
|
|
\draw[edge, directed] (fnm) -- (ynm);
|
|
\draw[edge, directed] (alphanm) -- (an);
|
|
\draw[edge, directed] (ynm) |- (yn);
|
|
\draw[edge, directed] (an) -- (yn);
|
|
|
|
% ---
|
|
|
|
\begin{scope}[on background layer]
|
|
\node[
|
|
yshift=3pt,
|
|
plate, inner sep=5pt,
|
|
opacity=0.6,
|
|
fit=(fnm)(alphanm)(ynm),
|
|
label={[font=\small, opacity=0.6, anchor=south east]south east:K}
|
|
] {};
|
|
\end{scope}
|
|
\end{scope}
|
|
|
|
\begin{scope}[on background layer]
|
|
% NOTE(mrksr): See https://tex.stackexchange.com/a/82282
|
|
\node[
|
|
fit=(Xn)(fnm)(yn)(an),
|
|
inner xsep=10pt, inner ysep=6pt,
|
|
] (fzoom) {};
|
|
\draw [very thick, dashed, sStoneLight, fill=sStone, fill opacity=.1]
|
|
(f.west) --
|
|
(fzoom.north east) --
|
|
(fzoom.north west) --
|
|
(fzoom.south west) --
|
|
(fzoom.south east) --
|
|
(f.west) -- cycle
|
|
;
|
|
\end{scope}
|
|
}
|
|
|
|
\only<3->{
|
|
\node at (-4, 0) {$
|
|
\begin{aligned}
|
|
\Moment*{\E}{\Fun*{\mat{J}^\pi}{\mat{\theta}_\pi}}
|
|
&= \sum_{t=0}^T \gamma^t \Moment*{\E_{\Prob{\mat{s}_t \given \mat{\theta}_\pi}}}{\mat{r}_t} \\
|
|
% &\approx \sum_{t=0}^T \gamma^t \Moment*{\E_{\Variat{\mat{s}_t}}}{\mat{r}_t} \\
|
|
&\approx \frac{1}{P} \sum_{p=1}^P \sum_{t=0}^T \gamma^t r_t^{\pix{p}} \\[3ex]
|
|
\nabla \Fun*{\mat{J}^\pi}{\theta_\pi}
|
|
&\approx \frac{1}{P} \sum_{p=1}^P \sum_{t=0}^T \gamma^t \nabla_{\theta_\pi} r_t^{\pix{p}}
|
|
\end{aligned}
|
|
$};
|
|
}
|
|
\end{tikzpicture}
|
|
\end{document}
|