Source code for mcmcplot.utilities

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon May 14 06:24:12 2018

@author: prmiles
"""
import numpy as np
from scipy import pi, sin, cos
import sys
import math


[docs]def check_settings(default_settings, user_settings=None):
    '''
    Check user settings with default.

    Recursively checks elements of user settings against the defaults
    and updates settings as it goes.  If a user setting does not exist
    in the default, then the user setting is added to the settings.
    If the setting is defined in both the user and default settings,
    then the user setting overrides the default.  Otherwise, the default
    settings persist.

    Args:
        * **default_settings** (:py:class:`dict`): Default settings for \
        particular method.

    Kwargs:
        * **user_settings** (:py:class:`dict`): User defined settings. \
          Default: `None`

    Returns:
        * (:py:class:`dict`): Updated settings.
    '''
    settings = default_settings.copy()  # initially define settings as default
    options = list(default_settings.keys())  # get default settings
    if user_settings is None:  # convert to empty dict
        user_settings = {}
    user_options = list(user_settings.keys())  # get user settings
    for uo in user_options:  # iterate through settings
        if uo in options:
            # check if checking a dictionary
            if isinstance(settings[uo], dict):
                settings[uo] = check_settings(settings[uo], user_settings[uo])
            else:
                settings[uo] = user_settings[uo]
        if uo not in options:
            settings[uo] = user_settings[uo]
    return settings


[docs]def generate_subplot_grid(nparam=2):
    '''
    Generate subplot grid.

    For example, if `nparam` = 2, then the subplot will have
    2 rows and 1 column.

    Kwargs:
        * **nparam** (:py:class:`int`): Number of parameters. \
          Default: `2`

    Returns:
        * **ns1** (:py:class:`int`): Number of rows in subplot
        * **ns2** (:py:class:`int`): Number of columns in subplot
    '''
    ns1 = math.ceil(math.sqrt(nparam))
    ns2 = round(math.sqrt(nparam))
    return ns1, ns2


[docs]def generate_names(nparam, names):
    '''
    Generate parameter name set.

    For example, if `nparam` = 4, then the generated names are::

        names = ['p_{0}', 'p_{1}', 'p_{2}', 'p_{3}']

    Args:
        * **nparam** (:py:class:`int`): Number of parameter names to generate
        * **names** (:py:class:`list`): Names of parameters provided by user

    Returns:
        * **names** (:py:class:`list`): List of strings - parameter names
    '''
    # Check if names defined
    if names is None:
        names = generate_default_names(nparam)
    # Check if enough names defined
    if len(names) != nparam:
        names = extend_names_to_match_nparam(names, nparam)
    return names


[docs]def generate_default_names(nparam):
    '''
    Generate generic parameter name set.

    For example, if `nparam` = 4, then the generated names are::

        names = ['$p_{0}$', '$p_{1}$', '$p_{2}$', '$p_{3}$']

    Args:
        * **nparam** (:py:class:`int`): Number of parameter names to generate

    Returns:
        * **names** (:py:class:`list`): List of strings - parameter names
    '''
    names = []
    for ii in range(nparam):
        names.append(str('$p_{{{}}}$'.format(ii)))
    return names


[docs]def extend_names_to_match_nparam(names, nparam):
    '''
    Append names to list using default convention
    until length of names matches number of parameters.

    For example, if `names = ['name_1', 'name_2']` and `nparam = 4`, then
    two additional names will be appended to the `names` list.
    E.g.,::

        names = ['name_1', 'name_2', '$p_{2}$', '$p_{3}$']

    Args:
        * **names** (:py:class:`list`): Names of parameters provided by user
        * **nparam** (:py:class:`int`): Number of parameter names to generate

    Returns:
        * **names** (:py:class:`list`): List of strings - extended \
        list of parameter names
    '''
    if names is None:
        names = []
    n0 = len(names)
    for ii in range(n0, nparam):
        names.append(str('$p_{{{}}}$'.format(ii)))
    return names


# --------------------------------------------
[docs]def make_x_grid(x, npts=100):
    '''
    Generate x grid based on extrema.

    1. If `len(x) > 200`, then generates grid based on difference
    between the max and min values in the array.

    2. Otherwise, the grid is defined with respect to the array
    mean plus or minus four standard deviations.

    Args:
        * **x** (:class:`~numpy.ndarray`): Array of points

    Kwargs:
        * **npts** (:py:class:`int`): Number of points to use in \
          generated grid. Default: `100`

    Returns:
        * (:class:`~numpy.ndarray`): Uniformly spaced array of points \
        with shape :code:`(npts,1)`.
    '''
    xmin = min(x)
    xmax = max(x)
    xxrange = xmax-xmin
    if len(x) > 200:
        x_grid = np.linspace(xmin-0.08*xxrange, xmax+0.08*xxrange, npts)
    else:
        x_grid = np.linspace(np.mean(x)-4*np.std(x, ddof=1),
                             np.mean(x)+4*np.std(x, ddof=1), npts)
    return x_grid.reshape(x_grid.shape[0], 1)  # returns 1d column vector


# --------------------------------------------
# see MASS 2nd ed page 181.
[docs]def iqrange(x):
    '''
    Interquantile range of each column of x.

    Args:
        * **x** (:class:`~numpy.ndarray`): Array of points.

    Returns:
        * (:class:`~numpy.ndarray`): Interquantile range - single \
        element array, `q3 - q1`.
    '''
    nr, nc = x.shape
    if nr == 1:  # make sure it is a column vector
        x = x.reshape(nc, nr)
        nr = nc
        nc = 1
    # sort
    x.sort()
    i1 = math.floor((nr + 1)/4)
    i3 = math.floor(3/4*(nr+1))
    f1 = (nr+1)/4-i1
    f3 = 3/4*(nr+1)-i3
    q1 = (1-f1)*x[int(i1), :] + f1*x[int(i1)+1, :]
    q3 = (1-f3)*x[int(i3), :] + f3*x[int(i3)+1, :]
    return q3-q1


[docs]def gaussian_density_function(x, mu=0, sigma2=1):
    '''
    Standard normal/Gaussian density function.

    Args:
        * **x** (:py:class:`float`): Value of which to calculate probability.

    Kwargs:
        * **mu** (:py:class:`float`): Mean of Gaussian distribution. \
          Default: `0`
        * **sigma2** (:py:class:`float`): Variance of Gaussian \
          distribution. Default: `1`

    Returns:
        * **y** (:py:class:`float`): Likelihood of `x`.
    '''
    y = 1/math.sqrt(2*math.pi*sigma2)*math.exp(-0.5*(x-mu)**2/sigma2)
    return y


[docs]def scale_bandwidth(x):
    '''
    Scale bandwidth of array.

    Args:
        * **x** (:class:`~numpy.ndarray`): Array of points - column of chain.

    Returns:
        * **s** (:class:`~numpy.ndarray`): Scaled bandwidth - single \
        element array.
    '''
    n = len(x)
    if iqrange(x) <= 0:
        s = 1.06*np.array([np.std(x, ddof=1)*n**(-1/5)])
    else:
        s = 1.06*np.array([min(np.std(x, ddof=1), iqrange(x)/1.34)*n**(-1/5)])
    return s


# --------------------------------------------
[docs]def generate_ellipse(mu, cmat, ndp=100):
    '''
    Generates points for a probability contour ellipse

    Args:
        * **mu** (:class:`~numpy.ndarray`): Mean values
        * **cmat** (:class:`~numpy.ndarray`): Covariance matrix

    Kwargs:
        * **npd** (:py:class:`int`): Number of points to generate. \
          Default: `100`

    Returns:
        * **x** (:class:`~numpy.ndarray`): x-points
        * **y** (:class:`~numpy.ndarray`): y-points
    '''
    # check shape of covariance matrix
    if cmat.shape != (2, 2):
        sys.exit('covariance matrix must be 2x2')
    if check_symmetric(cmat) is not True:
        sys.exit('covariance matrix must be symmetric')
    # define t space
    t = np.linspace(0, 2*pi, ndp)
    pdflag, R = is_semi_pos_def_chol(cmat)
    if pdflag is False:
        sys.exit('covariance matrix must be positive definite')
    x = mu[0] + R[0, 0]*cos(t)
    y = mu[1] + R[0, 1]*cos(t) + R[1, 1]*sin(t)
    return x, y


[docs]def generate_ellipse_plot_points(x, y, ndp=100):
    '''
    Generates points for a probability contour ellipse for 2 columns of chain

    Args:
        * **x** (:class:`~numpy.ndarray`): chain 1
        * **y** (:class:`~numpy.ndarray`): chain 2

    Kwargs:
        * **npd** (:py:class:`int`): Number of points to generate. \
          Default: `100`

    Returns:
        * (:py:class:`dict`): 50% and 95% probability contours.
    '''
    c50 = 1.3863  # critical values from chisq(2) distribution
    c95 = 5.9915
    sig = np.cov(x.reshape(x.size,), y.reshape(y.size,))
    mu = np.mean(np.array([x, y]), axis=1)
    xe50, ye50 = generate_ellipse(mu, c50*sig, ndp=ndp)
    xe95, ye95 = generate_ellipse(mu, c95*sig, ndp=ndp)
    return {'xe50': xe50, 'ye50': ye50, 'xe95': xe95, 'ye95': ye95}


[docs]def check_symmetric(a, tol=1e-8):
    '''
    Check if array is symmetric by comparing with transpose.

    Args:
        * **a** (:class:`~numpy.ndarray`): Array to test.

    Kwargs:
        * **tol** (:py:class:`float`): Tolerance for testing equality. \
          Default: `1e-8`

    Returns:
        * (:py:class:`bool`): True -> symmetric, False -> not symmetric.
    '''
    return np.allclose(a, a.T, atol=tol)


[docs]def is_semi_pos_def_chol(x):
    '''
    Check if matrix is semi positive definite by calculating Cholesky
    decomposition.

    Args:
        * **x** (:class:`~numpy.ndarray`): Matrix to check

    Returns:
        * If matrix is `not` semi positive definite return :code:`False, None`
        * If matrix is semi positive definite return :code:`True` and the \
        Upper triangular form of the Cholesky decomposition matrix.
    '''
    c = None
    try:
        c = np.linalg.cholesky(x)
        return True, c.transpose()
    except np.linalg.linalg.LinAlgError:
        return False, c


[docs]def append_to_nrow_ncol_based_on_shape(sh, nrow, ncol):
    '''
    Append to list based on shape of array

    Args:
        * **sh** (:py:class:`tuple`): Shape of array.
        * **nrow** (:py:class:`list`): List of number of rows
        * **ncol** (:py:class:`list`): List of number of columns

    Returns:
        * **nrow** (:py:class:`list`): List of number of rows
        * **ncol** (:py:class:`list`): List of number of columns
    '''
    if len(sh) == 1:
        nrow.append(sh[0])
        ncol.append(1)
    else:
        nrow.append(sh[0])
        ncol.append(sh[1])
    return nrow, ncol


[docs]def setup_subsample(skip, maxpoints, nsimu):
    '''
    Setup subsampling from posterior.

    When plotting the sampling chain, it is often beneficial to subsample
    in order to avoid to dense of plots.  This routine determines the
    appropriate step size based on the size of the chain (nsimu) and maximum
    points allowed to plot (maxpoints).  The function checks if the
    size of the chain exceeds the maximum number of points allowed in the
    plot.  If yes, skip is defined such that every the max number of points
    are used and sampled evenly from the start to end of the chain.  Otherwise
    the value of skip is return as defined by the user.  A subsample index
    is then generated based on the value of skip and the number of simulations.

    Args:
        * **skip** (:py:class:`int`): User defined skip value.
        * **maxpoints** (:py:class:`int`): Maximum points allowed in each plot.

    Returns:
        * (:py:class:`int`): Skip value.
    '''
    if nsimu > maxpoints:
        skip = int(np.floor(nsimu/maxpoints))
    return np.arange(0, nsimu, skip)