Source code for queens.variational_distributions.full_rank_normal

#
# SPDX-License-Identifier: LGPL-3.0-or-later
# Copyright (c) 2024-2025, QUEENS contributors.
#
# This file is part of QUEENS.
#
# QUEENS is free software: you can redistribute it and/or modify it under the terms of the GNU
# Lesser General Public License as published by the Free Software Foundation, either version 3 of
# the License, or (at your option) any later version. QUEENS is distributed in the hope that it will
# be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You
# should have received a copy of the GNU Lesser General Public License along with QUEENS. If not,
# see <https://www.gnu.org/licenses/>.
#
"""Full-Rank Normal Variational Distribution."""

import numpy as np
import scipy
from numba import njit

from queens.utils.logger_settings import log_init_args
from queens.variational_distributions._variational_distribution import Variational


[docs] class FullRankNormal(Variational): r"""Full-rank multivariate normal distribution. Uses the parameterization (as in [1]) :math:`parameters=[\mu, \lambda]`, where :math:`\mu` are the mean values and :math:`\lambda` is an array containing the nonzero entries of the lower Cholesky decomposition of the covariance matrix :math:`L`: :math:`\lambda=[L_{00},L_{10},L_{11},L_{20},L_{21},L_{22}, ...]`. This allows the parameters :math:`\lambda` to be unconstrained. References: [1]: Kucukelbir, Alp, et al. "Automatic differentiation variational inference." The Journal of Machine Learning Research 18.1 (2017): 430-474. Attributes: n_parameters (int): Number of parameters used in the parameterization. """ @log_init_args def __init__(self, dimension): """Initialize variational distribution. Args: dimension (int): dimension of the RV """ super().__init__(dimension) self.n_parameters = (dimension * (dimension + 1)) // 2 + dimension
[docs] def initialize_variational_parameters(self, random=False): r"""Initialize variational parameters. Default initialization: :math:`\mu=0` and :math:`L=diag(1)` where :math:`\Sigma=LL^T` Random intialization: :math:`\mu=Uniform(-0.1,0.1)` :math:`L=diag(Uniform(0.9,1.1))` where :math:`\Sigma=LL^T` Args: random (bool, optional): If True, a random initialization is used. Otherwise the default is selected Returns: variational_parameters (np.ndarray): variational parameters (1 x n_params) """ if random: cholesky_covariance = np.eye(self.dimension) + 0.1 * ( -0.5 + np.diag(np.random.rand(self.dimension)) ) variational_parameters = np.zeros(self.dimension) + 0.1 * ( -0.5 + np.random.rand(self.dimension) ) for j in range(len(cholesky_covariance)): variational_parameters = np.hstack( (variational_parameters, cholesky_covariance[j, : j + 1]) ) else: mean = np.zeros(self.dimension) cholesky = np.ones((self.dimension * (self.dimension + 1)) // 2) variational_parameters = np.concatenate([mean, cholesky]) return variational_parameters
[docs] @staticmethod def construct_variational_parameters(mean, covariance): """Construct the variational parameters from mean and covariance. Args: mean (np.ndarray): Mean values of the distribution (n_dim x 1) covariance (np.ndarray): Covariance matrix of the distribution (n_dim x n_dim) Returns: variational_parameters (np.ndarray): Variational parameters """ if len(mean) == len(covariance): cholesky_covariance = np.linalg.cholesky(covariance) variational_parameters = mean.flatten() for j in range(len(cholesky_covariance)): variational_parameters = np.hstack( (variational_parameters, cholesky_covariance[j, : j + 1]) ) else: raise ValueError( f"Dimension of the mean value {len(mean)} does not equal covariance dimension" f"{covariance.shape}" ) return variational_parameters
[docs] def reconstruct_distribution_parameters(self, variational_parameters, return_cholesky=False): """Reconstruct mean value, covariance and its Cholesky decomposition. Args: variational_parameters (np.ndarray): Variational parameters return_cholesky (bool, optional): Return the L if desired Returns: mean (np.ndarray): Mean value of the distribution (n_dim x 1) cov (np.ndarray): Covariance of the distribution (n_dim x n_dim) L (np.ndarray): Cholesky decomposition of the covariance matrix (n_dim x n_dim) """ mean = variational_parameters[: self.dimension].reshape(-1, 1) cholesky_covariance_array = variational_parameters[self.dimension :] cholesky_covariance = np.zeros((self.dimension, self.dimension)) idx = np.tril_indices(self.dimension, k=0, m=self.dimension) cholesky_covariance[idx] = cholesky_covariance_array cov = np.matmul(cholesky_covariance, cholesky_covariance.T) if return_cholesky: return mean, cov, cholesky_covariance return mean, cov
def _grad_reconstruct_distribution_parameters(self): """Gradient of the parameter reconstruction. Returns: grad_reconstruct_params (np.ndarray): Gradient vector of the reconstruction w.r.t. the variational parameters """ grad_reconstruct_params = np.ones((1, self.n_parameters)) return grad_reconstruct_params
[docs] def draw(self, variational_parameters, n_draws=1): """Draw *n_draw* samples from the variational distribution. Args: variational_parameters (np.ndarray): Variational parameters n_draws (int): Number of samples to draw Returns: samples (np.ndarray): Row-wise samples of the variational distribution """ mean, _, cholesky = self.reconstruct_distribution_parameters( variational_parameters, return_cholesky=True ) sample = np.dot(cholesky, np.random.randn(self.dimension, n_draws)).T + mean.reshape(1, -1) return sample
[docs] def logpdf(self, variational_parameters, x): """Logpdf evaluated using the at samples *x*. Args: variational_parameters (np.ndarray): Variational parameters x (np.ndarray): Row-wise samples Returns: logpdf (np.ndarray): Row vector of the logpdfs """ mean, cov, cholesky = self.reconstruct_distribution_parameters( variational_parameters, return_cholesky=True ) x = np.atleast_2d(x) u = np.linalg.solve(cov, (x.T - mean)) def col_dot_prod(x, y): return np.sum(x * y, axis=0) logpdf = ( -0.5 * self.dimension * np.log(2 * np.pi) - np.sum(np.log(np.abs(np.diag(cholesky)))) - 0.5 * col_dot_prod(x.T - mean, u) ) return logpdf.flatten()
[docs] def pdf(self, variational_parameters, x): """Pdf of evaluated at given samples *x*. First computes the logpdf, which is numerically more stable for exponential distributions. Args: variational_parameters (np.ndarray): Variational parameters x (np.ndarray): Row-wise samples Returns: pdf (np.ndarray): Row vector of the pdfs """ pdf = np.exp(self.logpdf(variational_parameters, x)) return pdf
[docs] def grad_params_logpdf(self, variational_parameters, x): """Logpdf gradient w.r.t. to the variational parameters. Evaluated at samples *x*. Also known as the score function. Args: variational_parameters (np.ndarray): Variational parameters x (np.ndarray): Row-wise samples Returns: score (np.ndarray): Column-wise scores """ mean, cov, cholesky = self.reconstruct_distribution_parameters( variational_parameters, return_cholesky=True ) x = np.atleast_2d(x) # Helper variable q = np.linalg.solve(cov, x.T - mean) dlogpdf_dmu = q.copy() diag_indx = np.cumsum(np.arange(1, self.dimension + 1)) - 1 n_params_chol = (self.dimension * (self.dimension + 1)) // 2 dlogpdf_dsigma = np.zeros((n_params_chol, 1)) # Term due to determinant dlogpdf_dsigma[diag_indx] = -1.0 / (np.diag(cholesky).reshape(-1, 1)) dlogpdf_dsigma = np.tile(dlogpdf_dsigma, (1, len(x))) # Term due to normalization b = np.matmul(cholesky.T, q) indx = 0 f = np.zeros(dlogpdf_dsigma.shape) for r in range(0, self.dimension): for s in range(0, r + 1): dlogpdf_dsigma[indx, :] += q[r, :] * b[s, :] f[indx, :] += q[r, :] * b[s, :] indx += 1 score = np.vstack((dlogpdf_dmu, dlogpdf_dsigma)) return score
[docs] def total_grad_params_logpdf(self, variational_parameters, standard_normal_sample_batch): """Total logpdf reparameterization gradient. Total logpdf reparameterization gradient w.r.t. the variational parameters. Args: variational_parameters (np.ndarray): Variational parameters standard_normal_sample_batch (np.ndarray): Standard normal distributed sample batch Returns: total_grad (np.ndarray): Total Logpdf reparameterization gradient """ idx = np.tril_indices(self.dimension, k=0, m=self.dimension) cholesky_diagonal_idx = np.where(np.equal(*idx))[0] + self.dimension total_grad = np.zeros((standard_normal_sample_batch.shape[0], variational_parameters.size)) total_grad[:, cholesky_diagonal_idx] = -1 / variational_parameters[cholesky_diagonal_idx] return total_grad
[docs] def grad_sample_logpdf(self, variational_parameters, sample_batch): """Computes the gradient of the logpdf w.r.t. to the *x*. Args: variational_parameters (np.ndarray): Variational parameters sample_batch (np.ndarray): Row-wise samples Returns: gradients_batch (np.ndarray): Gradients of the log-pdf w.r.t. the sample *x*. The first dimension of the array corresponds to the different samples. The second dimension to different dimensions within one sample. (Third dimension is empty and just added to keep slices two-dimensional.) """ # pylint: disable-next=unbalanced-tuple-unpacking mean, cov = self.reconstruct_distribution_parameters(variational_parameters) gradient_lst = [] for sample in sample_batch: gradient_lst.append( np.dot(np.linalg.inv(cov), -(sample.reshape(-1, 1) - mean)).reshape(-1, 1) ) gradients_batch = np.array(gradient_lst) return gradients_batch.reshape(sample_batch.shape)
[docs] def fisher_information_matrix(self, variational_parameters): """Compute the Fisher information matrix analytically. Args: variational_parameters (np.ndarray): Variational parameters Returns: FIM (np.ndarray): Matrix (num parameters x num parameters) """ _, cov, cholesky = self.reconstruct_distribution_parameters( variational_parameters, return_cholesky=True ) def fim_blocks(dimension): """Compute the blocks of the FIM.""" mu_block = np.linalg.inv(cov + 1e-8 * np.eye(len(cov))) n_params_chol = (dimension * (dimension + 1)) // 2 sigma_block = np.zeros((n_params_chol, n_params_chol)) matrix_list = [] # Improvements of this implementation are welcomed! for r in range(0, dimension): for s in range(0, r + 1): matrix_q = np.zeros(cholesky.shape) matrix_q[r, s] = 1 matrix_q = cholesky @ matrix_q.T matrix_q = np.linalg.solve(cov, matrix_q + matrix_q.T) matrix_list.append(matrix_q) for p in range(n_params_chol): for q in range(p + 1): val = 0.5 * np.trace(matrix_list[p] @ matrix_list[q]) sigma_block[p, q] = val sigma_block[q, p] = val return mu_block, sigma_block # Using jit is useful in higher dimensional cases but introduces an computational overhead # for lowerdimensional cases. Doing some tests showed that the break evenpoint is reached # at around dimension 35 if self.dimension < 35: mu_block, sigma_block = fim_blocks(self.dimension) else: mu_block, sigma_block = njit(fim_blocks)(self.dimension) return scipy.linalg.block_diag(mu_block, sigma_block)
[docs] def export_dict(self, variational_parameters): """Create a dict of the distribution based on the given parameters. Args: variational_parameters (np.ndarray): Variational parameters Returns: export_dict (dictionary): Dict containing distribution information """ # pylint: disable-next=unbalanced-tuple-unpacking mean, cov = self.reconstruct_distribution_parameters(variational_parameters) export_dict = { "type": "fullrank_Normal", "mean": mean, "covariance": cov, "variational_parameters": variational_parameters, } return export_dict
[docs] def conduct_reparameterization(self, variational_parameters, n_samples): """Conduct a reparameterization. Args: variational_parameters (np.ndarray): Array with variational parameters n_samples (int): Number of samples for current batch Returns: samples_mat (np.ndarray): Array of actual samples from the variational distribution """ standard_normal_sample_batch = np.random.normal(0, 1, size=(n_samples, self.dimension)) mean, _, cholesky = self.reconstruct_distribution_parameters( variational_parameters, return_cholesky=True ) samples_mat = mean + np.dot(cholesky, standard_normal_sample_batch.T) return samples_mat.T, standard_normal_sample_batch
[docs] def grad_params_reparameterization( self, variational_parameters, standard_normal_sample_batch, upstream_gradient ): r"""Calculate the gradient of the reparameterization. Args: variational_parameters (np.ndarray): Variational parameters standard_normal_sample_batch (np.ndarray): Standard normal distributed sample batch upstream_gradient (np.array): Upstream gradient Returns: gradient (np.ndarray): Gradient of the upstream function w.r.t. the variational parameters. **Note:** We assume that *grad_reconstruct_params* is a row-vector containing the partial derivatives of the reconstruction mapping of the actual distribution parameters w.r.t. the variational parameters. The variable *jacobi_parameters* is the (n_parameters :math:`\times` dim_sample) Jacobi matrix of the reparameterization w.r.t. the distribution parameters, with differentiating after the distribution parameters in different rows and different output dimensions of the sample per column. """ # pylint: disable=unused-argument grad_reconstruct_params = self._grad_reconstruct_distribution_parameters() indices_1, indices_2 = np.tril_indices(self.dimension) gradient = ( np.hstack( ( upstream_gradient, upstream_gradient[:, indices_1] * standard_normal_sample_batch[:, indices_2], ) ) * grad_reconstruct_params ) return gradient