Statistical Distributions

%matplotlib inline
import pandas as pd
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt

plt.rcParams.update({
    'font.size': 20.0, 
    'axes.titlesize': 'small', 
    'axes.labelsize': 'small', 
    'xtick.labelsize': 'small', 
    'ytick.labelsize': 'small'
})
def plot_continuous(dist):
    fig, ax = plt.subplots(1, 2, sharex=True, figsize=(15, 5))
    # Plot hist
    rvs = dist.rvs(size=1000) 
     
    ax[0].hist(rvs, alpha=0.2, histtype='stepfilled')
    x=np.linspace(dist.ppf(0.01), dist.ppf(0.99), 50) 
    ax[0].plot(x, dist.pdf(x), '-', lw=2);
    ax[0].set_title( dist.dist.name.title() + ' PDF') 
    ax[0].set_ylabel('p(X=x)')
    
    # Plot cdf.
    ax[1].plot(x, dist.cdf(x), '-', lw=2) 
    ax[1].set_title( dist.dist.name.title() + ' CDF') 
    ax[1].set_ylabel('p(X<=x)') 
    ax[1].set_xlabel('x');
    return (fig, ax)  


def plot_discrete(dist):
    fig, ax = plt.subplots(1, 2, sharex=True, figsize=(15, 5))
    
    # Plot hist
    rvs = dist.rvs(size=1000)
    w = np.ones_like(rvs)/ float(len(rvs))
    ax[0].hist(rvs, weights=w, alpha=0.2, histtype='stepfilled')
    
    # Plot pmf.
    k = np.arange(dist.ppf(0.01), dist.ppf(0.99)+1)
    ax[0].plot(k, dist.pmf(k), 'bo', lw=2);
    ax[0].set_title( dist.dist.name.title() + ' PMF') 
    ax[0].set_ylabel('p(X=k)')

    # Plot cdf.
    ax[1].plot(k, dist.cdf(k), 'bo', lw=2);
    ax[1].set_title( dist.dist.name.title() + ' CDF') 
    ax[1].set_ylabel('p(X<=k)') 
    ax[1].set_xlabel('k');
    return (fig, ax)

Uniform (Continuous)

#Models

Equally likely outcomes in the interval a to b, e.g. degrees between hour and minute hand.

#Parameters

  • a minimum value.
  • b maximum value.
  • x observed value.
a,b = 1,2
uniform = stats.uniform(loc=a, scale=b-a) 
plot = plot_continuous(uniform)


Exponential (Continuous)

#Models

Time between poisson events, e.g. time until taxi will pass street corner.

#Parameters

  • lambda average number of independent events per interval.
  • x observed time between events.
lam = 1  # lambda
exponential = stats.expon(scale=1/lam) 
plot = plot_continuous(exponential)


Gaussian (Continuous)

#Models

A bell curve, e.g. IQ score.

#Parameters

  • mu mean or expectation.
  • sigma standard deviation.
  • x observed value.
mu, sigma = 0, 1 
gaussian=stats.norm(loc=mu,scale=sigma) 
plot = plot_continuous(gaussian)


Bernoulli (Discrete)

#Models

One instance of a success or failure trial, e.g. (possibly unfair) coin toss.

#Parameters

  • p probability of success.
  • k failure or success, i.e. {0,1}, observation.
bernoulli = stats.bernoulli(p=0.6) 
plot = plot_discrete(bernoulli)

Geometric (Discrete)

#Models

Number of Bernoulli trials until first success, e.g. number of trials until coin flip turns out to be heads.

#Parameters

  • p probability of success (each trial).
  • k observed trials until success.
geometric = stats.geom(p=0.5) 
plot = plot_discrete(geometric)


Poisson (Discrete)

#Models

Number of events occurring in a fixed interval, e.g. number of taxis passing a street corner in a given hour (on avg. 10/hr).

#Parameters

  • lambda average number of independent events per interval.
  • k events observed in an interval.
lam = 4  # lambda
poisson = stats.poisson(mu=lam) 
plot = plot_discrete(poisson)

Binomial (Discrete)

#Models

Number of successes out of a number of Bernoulli trials with replacement., e.g. number of coin flips out of 100 that turn out to be heads.

#Parameters:

  • p probability of success (each trial).
  • n number of independent trials.
  • k observed number of successes
binomial=stats.binom(n=10,p=0.6); 
plot = plot_discrete(binomial)

Weibull (Continuous)

#Models

Time between events when rate is not constant, e.g. time-to-failure when rate of failure increases or decreases over time.

Gamma (Continuous)

#Models

Waiting time between Poisson distributed events. Used when waiting times between events are relevant, e.g. aggregate insurance claims or the amount of rainfall accumulated in a reservoir.

Hypergeometric (Discrete)

#Models

Number of successes out of a number of success or failure trials without replacement, e.g. Number of times you draw a black ball from an urn of black and white balls without putting any back.