0

I have been working with the following link, Fitting empirical distribution to theoretical ones with Scipy. I have been using my data and found out that the common distribution is the Hypsecant distribution. I couldn’t find the distribution in the pymc3 package, therefore, I decided to have a look with scipy to understand how the distribution is formed. I created a custom distribution for Hypsecant distribution in pymc3 and I have few questions:

  • I would like to know if my approach to creating the distribution is right?
  • How can I implement the custom distribution into models?
  • Regarding the prior distribution, do I use same steps in normal distribution priors (mu and sigma)?
  • How can I create functions for the random variable and logp?

My custom distribution:

import numpy as np
import theano.tensor as tt
from scipy import stats
from scipy.special import hyp1f1, nctdtr
import math
import warnings
from pymc3.theanof import floatX
from pymc3.distributions.dist_math import bound, gammaln
from pymc3.distributions.continuous import assert_negative_support, get_tau_sigma
from pymc3.distributions.distribution import Continuous, draw_values, generate_samples


class Hypsecant(Continuous):
    """
    Parameters
    ----------
    mu: float
        Location parameter.
    sigma: float
        Scale parameter (sigma > 0). Converges to the standard deviation as nu increases. (only required if lam is not specified)
    lam: float
        Scale parameter (lam > 0). Converges to the precision as nu increases. (only required if sigma is not specified)
    """

    def __init__(self, mu=0, sigma=1, lam=None, sd=None, *args, **kwargs):
        super().__init__(*args, **kwargs)
        super(Hypsecant, self).__init__(*args, **kwargs)
        if sd is not None:
            sigma = sd
            warnings.warn("sd is deprecated, use sigma instead", DeprecationWarning)
        lam, sigma = get_tau_sigma(tau=lam, sigma=sigma)
        self.lam = lam = tt.as_tensor_variable(lam)
        self.sigma = self.sd = sigma = tt.as_tensor_variable(sigma)
        self.mean = self.median = self.mode = self.mu = mu = tt.as_tensor_variable(mu)
        self.variance = tt.as_tensor_variable(1)

        assert_negative_support(lam, 'lam (sigma)', 'Hypsecant')

    def random(self, point=None, size=None):
        """
        Draw random values from Hypsecant distribution.
        Parameters
        ----------
        point: dict, optional
            Dict of variable values on which random values are to be
            conditioned (uses default point if not specified).
        size: int, optional
            Desired size of random sample (returns one sample if not
            specified).
        Returns
        -------
        array
        """
        mu, lam = draw_values([self.mu, self.lam], point=point, size=size)
        return generate_samples(stats.hypsecant.rvs, loc=mu, scale=lam, dist_shape=self.shape, size=size)

    def logp(self, value):
        """
        Calculate log-probability of Hypsecant distribution at specified value.
        Parameters
        ----------
        value: numeric
            Value(s) for which log-probability is calculated. If the log probabilities for multiple
            values are desired the values must be provided in a numpy array or theano tensor
        Returns
        -------
        TensorVariable
        """
        
        mu = self.mu
        lam = self.lam

        return bound()

    def logcdf(self, value):
        """
        Compute the log of the cumulative distribution function for Hypsecant distribution
        at the specified value.
        Parameters
        ----------
        value: numeric
            Value(s) for which log CDF is calculated. If the log CDF for multiple
            values are desired the values must be provided in a numpy array or theano tensor.
        Returns
        -------
        TensorVariable
        """
        mu = self.mu
        lam = self.lam
        
        return 2.0 / math.pi * tt.math.atan(tt.math.exp(value))

My Custom model:

with pm.Model() as model:
                # Prior Distributions for unknown model parameters:
                mu = pm.Normal('mu', mu=0, sigma=1)
                sd = pm.HalfNormal('sd', sigma=1)

                # Observed data is from a Likelihood distributions (Likelihood (sampling distribution) of observations):
                observed_data = Hypsecant('observed_data', mu=mu, sd=sd, observed=data_list)
                
                # draw 5000 posterior samples
                trace_S = pm.sample(draws=5000, tune=2000, chains=3, cores=1)
        
                # Obtaining Posterior Predictive Sampling:
                post_pred_S = pm.sample_posterior_predictive(trace_S, samples=3000)
                print(post_pred_S['observed_data'].shape)
                print('\nSummary: ')
                print(pm.stats.summary(data=trace_S))
                print(pm.stats.summary(data=post_pred_S))

Edit 1:

I edited the logp part and custom model. Please find the code below:

class Hypsecant(Continuous):
    """
    Parameters
    ----------
    mu: float
        Location parameter.
    sigma: float
        Scale parameter (sigma > 0). Converges to the standard deviation as nu increases. (only required if lam is not specified)
    lam: float
        Scale parameter (lam > 0). Converges to the precision as nu increases. (only required if sigma is not specified)
    """

    def __init__(self, mu=0, sigma=1, lam=None, sd=None, *args, **kwargs):
        super().__init__(*args, **kwargs)
        super(Hypsecant, self).__init__(*args, **kwargs)
        if sd is not None:
            sigma = sd
            warnings.warn("sd is deprecated, use sigma instead", DeprecationWarning)
        lam, sigma = get_tau_sigma(tau=lam, sigma=sigma)
        self.lam = lam = tt.as_tensor_variable(lam)
        self.sigma = self.sd = sigma = tt.as_tensor_variable(sigma)
        self.mean = self.median = self.mode = self.mu = mu = tt.as_tensor_variable(mu)
        self.variance = tt.as_tensor_variable(1)

        assert_negative_support(lam, 'lam (sigma)', 'Hypsecant')

    def random(self, point=None, size=None):
        """
        Draw random values from Hypsecant distribution.
        Parameters
        ----------
        point: dict, optional
            Dict of variable values on which random values are to be
            conditioned (uses default point if not specified).
        size: int, optional
            Desired size of random sample (returns one sample if not
            specified).
        Returns
        -------
        array
        """
        mu, lam = draw_values([self.mu, self.lam], point=point, size=size)
        return generate_samples(stats.hypsecant.rvs, loc=mu, scale=lam, dist_shape=self.shape, size=size)

    def logp(self, value):
        """
        Calculate log-probability of Hypsecant distribution at specified value.
        Parameters
        ----------
        value: numeric
            Value(s) for which log-probability is calculated. If the log probabilities for multiple
            values are desired the values must be provided in a numpy array or theano tensor
        Returns
        -------
        TensorVariable
        """
        
        mu = self.mu
        lam = self.lam

        Px = 1.0/(math.pi * tt.math.cosh(value))
        return bound(Px, mu, lam)

    def logcdf(self, value):
        """
        Compute the log of the cumulative distribution function for Hypsecant distribution
        at the specified value.
        Parameters
        ----------
        value: numeric
            Value(s) for which log CDF is calculated. If the log CDF for multiple
            values are desired the values must be provided in a numpy array or theano tensor.
        Returns
        -------
        TensorVariable
        """
        mu = self.mu
        lam = self.lam
        
        return 2.0 / math.pi * tt.math.atan(tt.math.exp(value))

Model:

with pm.Model() as model:
                # Prior Distributions for unknown model parameters:
                mu = pm.Normal('mu', mu=0, sigma=1)
                sd = pm.Normal('sd', mu=0, sigma=1)
                
                # Custom distribution:
                # observed_data = pm.DensityDist('observed_data', NonCentralStudentT, observed=data_list)

                # Observed data is from a Likelihood distributions (Likelihood (sampling distribution) of observations):
                observed_data = Hypsecant('observed_data', mu=mu, sd=sd, observed=data)
                
                # draw 5000 posterior samples
                trace = pm.sample(draws=5000, tune=3000, chains=2, cores=1)
        
                # Obtaining Posterior Predictive Sampling:
                post_pred = pm.sample_posterior_predictive(trace, samples=3000)
                print(post_pred['observed_data'].shape)
                print('\nSummary: ')
                print(pm.stats.summary(data=trace))
                print(pm.stats.summary(data=post_pred))

Edit 2:

I modified the class to the following:

class Hypsecant(Continuous):
    """
    Parameters
    ----------
    mu: float
        Location parameter.
    sigma: float
        Scale parameter (sigma > 0). Converges to the standard deviation as nu increases. (only required if lam is not specified)
    lam: float
        Scale parameter (lam > 0). Converges to the precision as nu increases. (only required if sigma is not specified)
    """

    def __init__(self, mu=None, sigma=None, lam=None, sd=None, *args, **kwargs):
        # super().__init__(*args, **kwargs)
        # super(Hypsecant, self).__init__(*args, **kwargs)
        if sd is not None:
            sigma = sd
            warnings.warn("sd is deprecated, use sigma instead", DeprecationWarning)
        lam, sigma = get_tau_sigma(tau=lam, sigma=sigma)
        self.lam = lam = tt.as_tensor_variable(lam)
        self.sigma = self.sd = sigma = tt.as_tensor_variable(sigma)
        self.mean = self.median = self.mode = self.mu = mu = tt.as_tensor_variable(mu)
        self.variance = tt.as_tensor_variable(1)

        assert_negative_support(mu, 'mu', 'Hypsecant')
        assert_negative_support(sigma, 'sigma (lam)', 'Hypsecant')
        
        # return super(Hypsecant, self).__init__(shape=[mu, sigma], *args, **kwargs)
        return super().__init__(*args, **kwargs)

    def random(self, point=None, size=None):
        """
        Draw random values from Hypsecant distribution.
        Parameters
        ----------
        point: dict, optional
            Dict of variable values on which random values are to be
            conditioned (uses default point if not specified).
        size: int, optional
            Desired size of random sample (returns one sample if not
            specified).
        Returns
        -------
        array
        """
        # mu = self.mu
        # lam, sigma = get_tau_sigma(tau=lam, sigma=sigma)
        
        mu, lam = draw_values([self.mu, self.lam], point=point, size=size)
        return generate_samples(stats.hypsecant.rvs, loc=mu, scale=lam, dist_shape=self.shape, size=size)

    def logp(self, value):
        """
        Calculate log-probability of Hypsecant distribution at specified value.
        Parameters
        ----------
        value: numeric
            Value(s) for which log-probability is calculated. If the log probabilities for multiple
            values are desired the values must be provided in a numpy array or theano tensor
        Returns
        -------
        TensorVariable
        """
        
        # mu = self.mu
        # lam = self.lam
        # sigma = self.sigma
        # # sd = self.sd
        # mu = self.mu
        # lam, sigma = get_tau_sigma(sigma=sigma)

        Px = pm.math.log(1.0/(math.pi * pm.math.cosh(value)))
        return bound(Px)
        # return bound(Px, mu, lam, sigma)

    def logcdf(self, value):
        """
        Compute the log of the cumulative distribution function for Hypsecant distribution
        at the specified value.
        Parameters
        ----------
        value: numeric
            Value(s) for which log CDF is calculated. If the log CDF for multiple
            values are desired the values must be provided in a numpy array or theano tensor.
        Returns
        -------
        TensorVariable
        """
        
        # # mu = self.mu
        # # lam = self.lam
        # # # self.sigma = self.sd = sigma = tt.as_tensor_variable(sigma)
        # mu = self.mu
        # lam = self.lam
        # sigma = self.sigma
        # # sd = self.sd
        # mu = self.mu
        # lam, sigma = get_tau_sigma(sigma=sigma)
        
        # return bound(pm.math.log(2.0 / math.pi * tt.math.atan(tt.math.exp(value))), mu, lam, sigma)
        return bound(pm.math.log(2.0 / math.pi * tt.math.atan(tt.math.exp(value))))
WDpad159
  • 179
  • 1
  • 12

0 Answers0