#
# SPDX-License-Identifier: LGPL-3.0-or-later
# Copyright (c) 2024-2025, QUEENS contributors.
#
# This file is part of QUEENS.
#
# QUEENS is free software: you can redistribute it and/or modify it under the terms of the GNU
# Lesser General Public License as published by the Free Software Foundation, either version 3 of
# the License, or (at your option) any later version. QUEENS is distributed in the hope that it will
# be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You
# should have received a copy of the GNU Lesser General Public License along with QUEENS. If not,
# see <https://www.gnu.org/licenses/>.
#
"""Monte Carlo Control Variates Iterator."""
import logging
import numpy as np
from queens.iterators._iterator import Iterator
from queens.utils.logger_settings import log_init_args
from queens.utils.process_outputs import write_results
_logger = logging.getLogger(__name__)
[docs]
class ControlVariates(Iterator):
r"""Monte Carlo control variates iterator.
The control variates method in the context of Monte Carlo is used to quantify uncertainty in a
model when input parameters are uncertain and only expressed as probability distributions. The
Monte Carlo control variates method uses so-called low-fidelity models as control variates to
make the quantification more precise. In the context of Monte Carlo, the control variate method
is sometimes also called control variable method.
The estimator for the Monte Carlo control variates method with a single control variate is
given by
:math:`\hat{\mu}_{f}= \underbrace{\frac{1}{N} \sum\limits_{i=1}^{N} \Big [ f(x^{(i)}) - \alpha
\Big(g(x^{(i)}) - \hat\mu_{g} \Big) \Big]}_\textrm{cross-model estimator}`
where :math:`f` represents the model, :math:`g` the control variate, and :math:`\hat\mu_{g}`
the expectation of the control variate. :math:`N` represents the number of samples on the
cross-model estimator and :math:`x^{(i)}` are random parameter samples.
In case the mean of the control variate is known, :math:`\hat\mu_{g}` can be passed to the
iterator as expectation_cv. Otherwise, :math:`\hat\mu_{g}` is estimated with the Monte
Carlo method.
The implementation is based on chapter 9.3 in [1] and uses one control variate.
References:
[1] D. P. Kroese, Z. I. Botev, and T. Taimre. "Handbook of Monte Carlo Methods". Wiley,
2011.
Attributes:
model (Model): Main model. The uncertainties are quantified for this model.
control_variate (Model): Control variate model.
seed (int): Seed for random samples.
num_samples (int): Number of samples on the cross-model estimator.
expectation_cv (float): Expectation of the control variate. If the expectation is None,
it will be estimated via MC sampling.
output (dict): Output dict with the following entries:
* ``mean`` (float): Cross-model estimator.
* ``std`` (float): Estimated standard deviation of the cross-model estimator.
* ``num_samples_cv`` (int): Number of samples to estimate the control variate
mean.
* ``mean_cv`` (float): Mean of control variate.
* ``std_cv_mean_estimator`` (float): Standard deviation of control variate
mean estimation.
* ``cv_influence_coeff`` (float): Method specific parameter that determines
the influence of the control variate.
* ``sample_ratio`` (float): Ratio of number of samples on control variate to
number of samples on main model. Is only part of
output if use_optimal_num_samples is True.
num_samples_cv (int): Number of samples to use for computing the expectation
of the control variate if this expectation is unknown.
samples (np.array): Samples for the control variates estimator.
use_optimal_num_samples (bool): Determines wether the iterator calculates and uses the
optimal number of samples to estimate the control variate
mean such that the variance of the control variates
estimator is minimized.
cost_model (float): Cost of evaluating the model.
cost_cv (float): Cost of evaluating the control variate.
variance_cv_mean_estimator (float): Variance of the control variate mean estimator.
"""
@log_init_args
def __init__(
self,
model,
control_variate,
parameters,
global_settings,
seed,
num_samples,
expectation_cv=None,
num_samples_cv=None,
use_optimal_num_samples=False,
cost_model=None,
cost_cv=None,
):
"""Initialize the control variates iterator.
Args:
model (Model): Main model. The uncertainties are quantified for this model.
control_variate (Model): Control variate model.
parameters (Parameters): Parameters to use for evaluation.
global_settings (GlobalSettings): Global settings to use for evaluation.
seed (int): Seed for random samples.
num_samples (int): Number of samples on the cross-model estimator.
expectation_cv (float, opt): Expectation of the control variate. If the expectation is
None, it will be estimated via MC sampling.
num_samples_cv (int, opt): Number of samples to use for computing the expectation
of the control variate if this expectation is unknown.
use_optimal_num_samples (bool, opt): Determines wether the iterator calculates and uses
the optimal number of samples to estimate the
control variate mean such that the variance of the
control variates estimator is minimized.
cost_model (float, opt): Cost of evaluating the model.
cost_cv (float, opt): Cost of evaluating the control variate.
Raises:
ValueError: If model is None.
If control_variable is None.
If neither the expectation nor the number of samples of the control
variate are given and the optimal number of samples is not used.
If the optimal number of samples are used and the costs of the models are
not provided.
"""
# Initialize parent iterator with main model.
super().__init__(model, parameters, global_settings)
if model is None or control_variate is None:
raise ValueError("A model and a control variate have to be given!")
if expectation_cv is None and num_samples_cv is None and not use_optimal_num_samples:
raise ValueError(
"expectation_cv or num_samples_cv has to be given when not using "
"the optimal number of samples."
)
if use_optimal_num_samples and (cost_model is None or cost_cv is None):
raise ValueError(
"Model and control variate costs have to be given if you want to use the optimal "
"number of samples"
)
self.control_variate = control_variate
self.seed = seed
self.num_samples = num_samples
self.samples = None
self.output = None
self.expectation_cv = expectation_cv
self.num_samples_cv = num_samples_cv
self.use_optimal_num_samples = use_optimal_num_samples
self.cost_model = cost_model
self.cost_cv = cost_cv
self.variance_cv_mean_estimator = 0
[docs]
def pre_run(self):
"""Draw samples for the core run."""
np.random.seed(self.seed)
self.samples = self.parameters.draw_samples(self.num_samples)
[docs]
def core_run(self):
"""Core run of iterator.
Computes the cross-model estimator and its standard deviation.
"""
# Evaluate models for the drawn samples.
output_model = np.concatenate(self.model.evaluate(self.samples)["result"])
output_cv = np.concatenate(self.control_variate.evaluate(self.samples)["result"])
# Compute the covariance matrix between the two models.
models_cov = np.cov(output_model, output_cv)
cov = models_cov[0, 1] # Covariance between the main model and the control variate
var_model = models_cov[0, 0] # Variance of main model
var_cv = models_cov[1, 1] # Variance of control variate
# Compute expectation of control variate if it is not known.
if self.expectation_cv is None:
# If using the optimal number of samples, calculate the best ratio of
# num_samples to num_samples_cv.
if self.use_optimal_num_samples:
# Correlation coefficient between the main model and the control variate.
correlation_coefficient = cov / np.sqrt(var_model * var_cv)
if correlation_coefficient >= 0.99999:
_logger.warning(
"The correlation between input models is perfect, do not use "
"control variates!"
)
# Calculate optimal factor relating number of samples on the cross-model
# estimator and the control variate estimator.
sample_ratio = (
np.sqrt(
correlation_coefficient**2
/ (1 - correlation_coefficient**2)
* (self.cost_model / self.cost_cv)
)
- 1
)
if sample_ratio <= 0:
raise ValueError(
"Optimal number of samples not possible for the chosen input models."
)
self.num_samples_cv = int(sample_ratio * self.num_samples)
# Draw samples and estimate the mean of the control variate via naive monte carlo.
samples = self.parameters.draw_samples(self.num_samples_cv)
results = self.control_variate.evaluate(samples)["result"]
self.expectation_cv = results.mean()
self.variance_cv_mean_estimator = results.var() / self.num_samples_cv
# Calculate coefficient that determines how much the control variate influences
# the control variate mean estimator.
cv_influence_coeff = cov / (var_cv + self.num_samples * self.variance_cv_mean_estimator)
# Calculate estimated mean of mean function with control variates estimator.
mean = (
output_model - cv_influence_coeff * output_cv
).mean() + cv_influence_coeff * self.expectation_cv
# Calculate the variance of control variates estimator.
var_estimator = 1 / self.num_samples * (var_model - cv_influence_coeff * cov)
self.output = {
"mean": mean,
"std": var_estimator**0.5,
"num_samples_cv": self.num_samples_cv,
"mean_cv": self.expectation_cv,
"std_cv_mean_estimator": self.variance_cv_mean_estimator**0.5,
"cv_influence_coeff": cv_influence_coeff,
}
if self.use_optimal_num_samples:
self.output["sample_ratio"] = sample_ratio
[docs]
def post_run(self):
"""Write results to result file."""
write_results(
processed_results=self.output, file_path=self.global_settings.result_file(".pickle")
)