More distribution objects#
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
from scipy.integrate import quad
from progressbar import progressbar as pbar
from rlxutils import subplots, copy_func
import pandas as pd
import seaborn as sns
import tensorflow as tf
import tensorflow_probability as tfp
tfd = tfp.distributions
tfb = tfp.bijectors
%matplotlib inline
2022-03-12 16:52:14.815510: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-03-12 16:52:14.815545: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
probs = np.r_[[.8,.3,.5]]
b = tfp.distributions.Bernoulli(probs=probs)
b
<tfp.distributions.Bernoulli 'Bernoulli' batch_shape=[3] event_shape=[] dtype=int32>
s = b.sample(100000).numpy()
s.shape
(100000, 3)
s
array([[1, 1, 0],
[1, 1, 0],
[0, 0, 1],
...,
[0, 0, 1],
[0, 1, 0],
[1, 1, 0]], dtype=int32)
np.mean(s, axis=0)
array([0.80058, 0.29994, 0.49951])
The Categorical distribution#
Last dimension of
probsis the number of classesThe rest of dimensions is the
batch_shape
probs = np.r_[[[.1,.4,.5],[10,6,4]]]
probs.shape
(2, 3)
c = tfd.Categorical(probs=probs)
c
<tfp.distributions.Categorical 'Categorical' batch_shape=[2] event_shape=[] dtype=int32>
s = c.sample(1000000).numpy()
print (s, "\n", s.shape)
[[2 0]
[1 2]
[1 1]
...
[1 0]
[1 1]
[2 1]]
(1000000, 2)
# frequencies observed in the sample must match the unnormalized probabilities.
pd.DataFrame([pd.Series(s[:,i]).value_counts()/len(s) for i in range(s.shape[1])],
index = [f"batch_{i}" for i in range(s.shape[1])]).T.plot(kind='bar')
plt.xlabel("category")
plt.grid();
# the categorical with two classes equivalen to the Bernoulli above
probs = np.r_[[[.2,.7,.5],
[.8,.3,.5]]].T
c = tfd.Categorical(probs=probs)
print (c)
s = c.sample(1000000).numpy()
print (s, s.shape)
tfp.distributions.Categorical("Categorical", batch_shape=[3], event_shape=[], dtype=int32)
[[1 0 1]
[1 0 0]
[1 0 1]
...
[0 1 0]
[0 0 0]
[0 0 1]] (1000000, 3)
s.mean(axis=0)
array([0.799924, 0.300756, 0.499514])
A Mixture distribution#
a
Categoricaldistribution acts as a selectora list of distributions to select from
mix = 0.3
bimix_gauss = tfd.Mixture(
cat=tfd.Categorical(probs=[mix, 1.-mix]),
components=[
tfd.Normal(loc=-1, scale=0.3),
tfd.Normal(loc=+1., scale=1.),
],
validate_args = True)
bimix_gauss
<tfp.distributions.Mixture 'Mixture' batch_shape=[] event_shape=[] dtype=float32>
s = bimix_gauss.sample(100000).numpy()
s.shape
(100000,)
xr = np.linspace(np.min(s), np.max(s), 100)
plt.hist(s, bins=100, density=True, alpha=.5);
plt.plot(xr, np.exp(bimix_gauss.log_prob(xr)), color="black")
plt.grid();