Source code for queens.iterators.control_variates

#
# SPDX-License-Identifier: LGPL-3.0-or-later
# Copyright (c) 2024-2025, QUEENS contributors.
#
# This file is part of QUEENS.
#
# QUEENS is free software: you can redistribute it and/or modify it under the terms of the GNU
# Lesser General Public License as published by the Free Software Foundation, either version 3 of
# the License, or (at your option) any later version. QUEENS is distributed in the hope that it will
# be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You
# should have received a copy of the GNU Lesser General Public License along with QUEENS. If not,
# see <https://www.gnu.org/licenses/>.
#
"""Monte Carlo Control Variates Iterator."""

import logging

import numpy as np

from queens.iterators._iterator import Iterator
from queens.utils.logger_settings import log_init_args
from queens.utils.process_outputs import write_results

_logger = logging.getLogger(__name__)



[docs]
class ControlVariates(Iterator):
    r"""Monte Carlo control variates iterator.

    The control variates method in the context of Monte Carlo is used to quantify uncertainty in a
    model when input parameters are uncertain and only expressed as probability distributions. The
    Monte Carlo control variates method uses so-called low-fidelity models as control variates to
    make the quantification more precise. In the context of Monte Carlo, the control variate method
    is sometimes also called control variable method.

    The estimator for the Monte Carlo control variates method with a single control variate is
    given by
    :math:`\hat{\mu}_{f}= \underbrace{\frac{1}{N} \sum\limits_{i=1}^{N} \Big [ f(x^{(i)}) - \alpha
    \Big(g(x^{(i)}) - \hat\mu_{g} \Big) \Big]}_\textrm{cross-model estimator}`
    where :math:`f` represents the model, :math:`g` the control variate, and :math:`\hat\mu_{g}`
    the expectation of the control variate. :math:`N` represents the number of samples on the
    cross-model estimator and :math:`x^{(i)}` are random parameter samples.

    In case the mean of the control variate is known, :math:`\hat\mu_{g}` can be passed to the
    iterator as expectation_cv. Otherwise, :math:`\hat\mu_{g}` is estimated with the Monte
    Carlo method.

    The implementation is based on chapter 9.3 in [1] and uses one control variate.

    References:
        [1] D. P. Kroese, Z. I. Botev, and T. Taimre. "Handbook of Monte Carlo Methods". Wiley,
            2011.

    Attributes:
        model (Model): Main model. The uncertainties are quantified for this model.
        control_variate (Model): Control variate model.
        seed (int): Seed for random samples.
        num_samples (int): Number of samples on the cross-model estimator.
        expectation_cv (float): Expectation of the control variate. If the expectation is None,
                                it will be estimated via MC sampling.
        output (dict): Output dict with the following entries:

                       * ``mean`` (float): Cross-model estimator.
                       * ``std`` (float): Estimated standard deviation of the cross-model estimator.
                       * ``num_samples_cv`` (int): Number of samples to estimate the control variate
                                                   mean.
                       * ``mean_cv`` (float): Mean of control variate.
                       * ``std_cv_mean_estimator`` (float): Standard deviation of control variate
                                                            mean estimation.
                       * ``cv_influence_coeff`` (float): Method specific parameter that determines
                                                         the influence of the control variate.
                       * ``sample_ratio`` (float): Ratio of number of samples on control variate to
                                                   number of samples on main model. Is only part of
                                                   output if use_optimal_num_samples is True.

        num_samples_cv (int): Number of samples to use for computing the expectation
                              of the control variate if this expectation is unknown.
        samples (np.array): Samples for the control variates estimator.
        use_optimal_num_samples (bool): Determines wether the iterator calculates and uses the
                                        optimal number of samples to estimate the control variate
                                        mean such that the variance of the control variates
                                        estimator is minimized.
        cost_model (float): Cost of evaluating the model.
        cost_cv (float): Cost of evaluating the control variate.
        variance_cv_mean_estimator (float): Variance of the control variate mean estimator.
    """

    @log_init_args
    def __init__(
        self,
        model,
        control_variate,
        parameters,
        global_settings,
        seed,
        num_samples,
        expectation_cv=None,
        num_samples_cv=None,
        use_optimal_num_samples=False,
        cost_model=None,
        cost_cv=None,
    ):
        """Initialize the control variates iterator.

        Args:
            model (Model): Main model. The uncertainties are quantified for this model.
            control_variate (Model): Control variate model.
            parameters (Parameters): Parameters to use for evaluation.
            global_settings (GlobalSettings): Global settings to use for evaluation.
            seed (int): Seed for random samples.
            num_samples (int): Number of samples on the cross-model estimator.
            expectation_cv (float, opt): Expectation of the control variate. If the expectation is
                                         None, it will be estimated via MC sampling.
            num_samples_cv (int, opt): Number of samples to use for computing the expectation
                                       of the control variate if this expectation is unknown.
            use_optimal_num_samples (bool, opt): Determines wether the iterator calculates and uses
                                                 the optimal number of samples to estimate the
                                                 control variate mean such that the variance of the
                                                 control variates estimator is minimized.
            cost_model (float, opt): Cost of evaluating the model.
            cost_cv (float, opt): Cost of evaluating the control variate.

        Raises:
            ValueError: If model is None.
                        If control_variable is None.
                        If neither the expectation nor the number of samples of the control
                        variate are given and the optimal number of samples is not used.
                        If the optimal number of samples are used and the costs of the models are
                        not provided.
        """
        # Initialize parent iterator with main model.
        super().__init__(model, parameters, global_settings)

        if model is None or control_variate is None:
            raise ValueError("A model and a control variate have to be given!")

        if expectation_cv is None and num_samples_cv is None and not use_optimal_num_samples:
            raise ValueError(
                "expectation_cv or num_samples_cv has to be given when not using "
                "the optimal number of samples."
            )

        if use_optimal_num_samples and (cost_model is None or cost_cv is None):
            raise ValueError(
                "Model and control variate costs have to be given if you want to use the optimal "
                "number of samples"
            )

        self.control_variate = control_variate
        self.seed = seed
        self.num_samples = num_samples
        self.samples = None
        self.output = None
        self.expectation_cv = expectation_cv
        self.num_samples_cv = num_samples_cv
        self.use_optimal_num_samples = use_optimal_num_samples
        self.cost_model = cost_model
        self.cost_cv = cost_cv
        self.variance_cv_mean_estimator = 0


[docs]
    def pre_run(self):
        """Draw samples for the core run."""
        np.random.seed(self.seed)

        self.samples = self.parameters.draw_samples(self.num_samples)



[docs]
    def core_run(self):
        """Core run of iterator.

        Computes the cross-model estimator and its standard deviation.
        """
        # Evaluate models for the drawn samples.
        output_model = np.concatenate(self.model.evaluate(self.samples)["result"])
        output_cv = np.concatenate(self.control_variate.evaluate(self.samples)["result"])

        # Compute the covariance matrix between the two models.
        models_cov = np.cov(output_model, output_cv)

        cov = models_cov[0, 1]  # Covariance between the main model and the control variate
        var_model = models_cov[0, 0]  # Variance of main model
        var_cv = models_cov[1, 1]  # Variance of control variate

        # Compute expectation of control variate if it is not known.
        if self.expectation_cv is None:

            # If using the optimal number of samples, calculate the best ratio of
            # num_samples to num_samples_cv.
            if self.use_optimal_num_samples:
                # Correlation coefficient between the main model and the control variate.
                correlation_coefficient = cov / np.sqrt(var_model * var_cv)
                if correlation_coefficient >= 0.99999:
                    _logger.warning(
                        "The correlation between input models is perfect, do not use "
                        "control variates!"
                    )
                # Calculate optimal factor relating number of samples on the cross-model
                # estimator and the control variate estimator.
                sample_ratio = (
                    np.sqrt(
                        correlation_coefficient**2
                        / (1 - correlation_coefficient**2)
                        * (self.cost_model / self.cost_cv)
                    )
                    - 1
                )

                if sample_ratio <= 0:
                    raise ValueError(
                        "Optimal number of samples not possible for the chosen input models."
                    )
                self.num_samples_cv = int(sample_ratio * self.num_samples)

            # Draw samples and estimate the mean of the control variate via naive monte carlo.
            samples = self.parameters.draw_samples(self.num_samples_cv)
            results = self.control_variate.evaluate(samples)["result"]
            self.expectation_cv = results.mean()
            self.variance_cv_mean_estimator = results.var() / self.num_samples_cv

        # Calculate coefficient that determines how much the control variate influences
        # the control variate mean estimator.
        cv_influence_coeff = cov / (var_cv + self.num_samples * self.variance_cv_mean_estimator)

        # Calculate estimated mean of mean function with control variates estimator.
        mean = (
            output_model - cv_influence_coeff * output_cv
        ).mean() + cv_influence_coeff * self.expectation_cv

        # Calculate the variance of control variates estimator.
        var_estimator = 1 / self.num_samples * (var_model - cv_influence_coeff * cov)

        self.output = {
            "mean": mean,
            "std": var_estimator**0.5,
            "num_samples_cv": self.num_samples_cv,
            "mean_cv": self.expectation_cv,
            "std_cv_mean_estimator": self.variance_cv_mean_estimator**0.5,
            "cv_influence_coeff": cv_influence_coeff,
        }

        if self.use_optimal_num_samples:
            self.output["sample_ratio"] = sample_ratio



[docs]
    def post_run(self):
        """Write results to result file."""
        write_results(
            processed_results=self.output, file_path=self.global_settings.result_file(".pickle")
        )