Source code for limix_lmm.lmm_core

from time import time

from .util import calc_Ai_beta_s2, hatodot


[docs]class LMMCore:
    r"""
    Core LMM for interaction and association testing.

    The model is

    .. math::
        \mathbf{y}\sim\mathcal{N}(
        \underbrace{\mathbf{F}\mathbf{b}}_{\text{covariates}}+
        \underbrace{(\mathbf{X}\hat{\odot}\mathbf{I})\;
        \boldsymbol{\beta}}_{\text{genetics}},
        \sigma^2\underbrace{\mathbf{K}_{
        \boldsymbol{\theta}_0}}_{\text{covariance}})

    :math:`\hat{\odot}` is defined as follows.
    Let

    .. math::
        \mathbf{A}=\left[\mathbf{a}_1,
                         \dots,
                         \mathbf{a}_{m}
                         \right]\in\mathbb{R}^{n\times{m}}]

    and

    .. math::
        \mathbf{B}=\left[\mathbf{b}_1,
                         \dots,
                         \mathbf{b}_{l}
                         \right]\in\mathbb{R}^{n\times{l}}

    then

    .. math::
        \mathbf{A}\hat{\odot}\mathbf{B} =
         \left[\mathbf{a}_1\odot\mathbf{b}_1
               \dots,
               \mathbf{a}_1\odot\mathbf{b}_{l},
               \mathbf{a}_2\odot\mathbf{b}_1,
               \dots,
               \mathbf{a}_2\odot\mathbf{b}_{l},
               \dots,
               \mathbf{a}_{m}\odot\mathbf{b}_{l},
               \dots,\mathbf{a}_{m}\odot\mathbf{b}_{l}
               \right]\in\mathbb{R}^{n\times{(ml)}}

    where :math:`\odot` is the element-wise product.

    Importantly, :math:`\mathbf{K}_{\boldsymbol{\theta}_0}`
    is not re-estimated under the laternative model,
    i.e. only :math:`\sigma^2` is learnt.
    The covariance parameters :math:`\boldsymbol{\theta}_0`
    need to be learnt using another module, e.g. limix-core.
    Note that as a consequence of this,
    such an implementation of single-variant analysis does not require the
    specification of the whole covariance but just of a method that
    specifies the product of its inverse by a vector.

    The test :math:`\boldsymbol{\beta}\neq{0}` is done
    in bocks of ``step`` variants,
    where ``step`` can be specifed by the user.

    Parameters
    ----------
    y : (`N`, 1) ndarray
        phenotype vector
    F : (`N`, L) ndarray
        fixed effect design for covariates.
    Ki_dot : function
        method that takes an array and returns the dot product of
        the inverse of the covariance and the input array.


    Examples
    --------

        Example with Inter and step=1

        .. doctest::

            >>> from numpy.random import RandomState
            >>> import scipy as sp
            >>> from limix_lmm.lmm_core import LMMCore
            >>> from limix_core.gp import GP2KronSumLR
            >>> from limix_core.covar import FreeFormCov
            >>> random = RandomState(1)
            >>> from numpy import set_printoptions
            >>> set_printoptions(4)
            >>>
            >>> N = 100
            >>> k = 1
            >>> m = 2
            >>> S = 1000
            >>> y = random.randn(N,1)
            >>> E = random.randn(N,k)
            >>> G = 1.*(random.rand(N,S)<0.2)
            >>> F = sp.concatenate([sp.ones((N,1)), random.randn(N,1)], 1)
            >>> Inter = random.randn(N, m)
            >>>
            >>> gp = GP2KronSumLR(Y=y, Cn=FreeFormCov(1), G=E, F=F, A=sp.ones((1,1)))
            >>> gp.covar.Cr.setCovariance(0.5*sp.ones((1,1)))
            >>> gp.covar.Cn.setCovariance(0.5*sp.ones((1,1)))
            >>> info_null = gp.optimize(verbose=False)
            >>>
            >>> lmm = LMMCore(y, F, gp.covar.solve)
            >>> lmm.process(G, Inter)
            >>> pv = lmm.getPv()
            >>> beta = lmm.getBetaSNP()
            >>> beta_ste = lmm.getBetaSNPste()
            >>> lrt = lmm.getLRT()
            >>>
            >>> print(pv.shape)
            (1000,)
            >>> print(beta.shape)
            (2, 1000)
            >>> print(pv[:4])
            [0.1428 0.3932 0.4749 0.3121]
            >>> print(beta[:,:4])
            [[ 0.0535  0.2571  0.122  -0.2328]
             [ 0.3418  0.3179 -0.2099  0.0986]]

        Example with step=4

        .. doctest::

            >>> lmm.process(G, step=4)
            >>> pv = lmm.getPv()
            >>> beta = lmm.getBetaSNP()
            >>> lrt = lmm.getLRT()
            >>>
            >>> print(pv.shape)
            (250,)
            >>> print(beta.shape)
            (4, 250)
            >>> print(pv[:4])
            [0.636  0.3735 0.7569 0.3282]
            >>> print(beta[:,:4])
            [[-0.0176 -0.4057  0.0925 -0.4175]
             [ 0.2821  0.2232 -0.2124  0.2433]
             [-0.0575  0.1528 -0.0384  0.019 ]
             [-0.2156 -0.2327 -0.1773 -0.1497]]

        Example with step=4 and Inter

        .. doctest::

            >>> lmm = LMMCore(y, F, gp.covar.solve)
            >>> lmm.process(G, Inter=Inter, step=4)
            >>> pv = lmm.getPv()
            >>> beta = lmm.getBetaSNP()
            >>> lrt = lmm.getLRT()
            >>>
            >>> print(pv.shape)
            (250,)
            >>> print(beta.shape)
            (8, 250)
            >>> print(pv[:4])
            [0.1591 0.2488 0.4109 0.5877]
            >>> print(beta[:,:4])
            [[ 0.0691  0.1977  0.435  -0.3214]
             [ 0.1701 -0.3195  0.0179  0.3344]
             [ 0.1887 -0.0765 -0.3847  0.1843]
             [-0.2974  0.2787  0.2427 -0.0717]
             [ 0.3784  0.0854  0.1566  0.0652]
             [ 0.4012  0.5255 -0.1572  0.1674]
             [-0.413   0.0278  0.1946 -0.1199]
             [ 0.0268 -0.0317 -0.1059  0.1414]]
    """

    def __init__(self, y, F, Ki_dot=None):
        import scipy as sp

        if F is None:
            F = sp.ones((y.shape[0], 1))
        self.y = y
        self.F = F
        self.Ki_dot = Ki_dot
        self.df = y.shape[0] - F.shape[1]
        self._fit_null()

    def _fit_null(self):
        """ Internal functon. Fits the null model """
        import scipy as sp

        if self.Ki_dot is None:
            self.Kiy = self.y
            self.KiF = self.F
        else:
            self.Kiy = self.Ki_dot(self.y)
            self.KiF = self.Ki_dot(self.F)
        self.FKiy = sp.dot(self.F.T, self.Kiy)
        self.FKiF = sp.dot(self.F.T, self.KiF)
        self.yKiy = sp.dot(self.y[:, 0], self.Kiy[:, 0])
        # calc beta_F0 and s20
        self.A0i, self.beta_F0, self.s20 = calc_Ai_beta_s2(
            self.yKiy, self.FKiF, self.FKiy, self.df
        )

[docs]    def process(self, G, Inter=None, step=1, verbose=False):
        r"""
        Fit genotypes one-by-one.

        Parameters
        ----------
        G : (`N`, `S`) ndarray
        Inter : (`N`, `M`) ndarray
            Matrix of `M` factors for `N` inds with which
            each variant interact
            By default, Inter is set to a matrix of ones.
        step : int
            Number of consecutive variants that should be
            tested jointly.
        verbose : bool
            verbose flag.
        """
        import scipy as sp
        import scipy.stats as st

        t0 = time()
        ntests = int(G.shape[1] / step)
        if Inter is None:
            mi = 1
        else:
            mi = Inter.shape[1]
        k = self.F.shape[1]
        m = mi * step
        F1KiF1 = sp.zeros((k + m, k + m))
        F1KiF1[:k, :k] = self.FKiF
        F1Kiy = sp.zeros((k + m, 1))
        F1Kiy[:k, 0] = self.FKiy[:, 0]
        s2 = sp.zeros(ntests)
        self.beta_g = sp.zeros([m, ntests])
        for s in range(ntests):
            idx1 = step * s
            idx2 = step * (s + 1)
            if Inter is not None:
                if step == 1:
                    X = Inter * G[:, idx1:idx2]
                else:
                    X = hatodot(Inter, G[:, idx1:idx2])
            else:
                X = G[:, idx1:idx2]
            if self.Ki_dot is None:
                KiX = X
            else:
                KiX = self.Ki_dot(X)
            F1KiF1[k:, :k] = sp.dot(X.T, self.KiF)
            F1KiF1[:k, k:] = F1KiF1[k:, :k].T
            F1KiF1[k:, k:] = sp.dot(X.T, KiX)
            F1Kiy[k:, 0] = sp.dot(X.T, self.Kiy[:, 0])
            # this can be sped up by using block matrix inversion, etc
            _, beta, s2[s] = calc_Ai_beta_s2(self.yKiy, F1KiF1, F1Kiy, self.df)
            self.beta_g[:, s] = beta[k:, 0]
        # dlml and pvs
        self.lrt = -self.df * sp.log(s2 / self.s20)
        self.pv = st.chi2(m).sf(self.lrt)

        t1 = time()
        if verbose:
            print("Tested for %d variants in %.2f s" % (G.shape[1], t1 - t0))

[docs]    def getPv(self):
        """
        Get pvalues

        Returns
        -------
        pv : ndarray
        """
        return self.pv

[docs]    def getBetaSNP(self):
        """
        get effect size SNPs


        Returns
        -------
        pv : ndarray
        """
        return self.beta_g

[docs]    def getBetaCov(self):
        """
        get beta of covariates

        Returns
        -------
        beta : ndarray
        """
        return self.beta_F

[docs]    def getLRT(self):
        """
        get lik ratio test statistics

        Returns
        -------
        lrt : ndarray
        """
        return self.lrt

[docs]    def getBetaSNPste(self):
        """
        get standard errors on betas

        Returns
        -------
        beta_ste : ndarray
        """
        import scipy as sp
        import scipy.stats as st

        beta = self.getBetaSNP()
        pv = self.getPv()
        z = sp.sign(beta) * sp.sqrt(st.chi2(1).isf(pv))
        ste = beta / z
        return ste