import pandas as pd
[docs]
class SurrogateCollector:
"""
Collector designed for gathering and organizing data from surrogate-based optimization results.
This class provides methods to retrieve various sets of data including model outputs,
timing metrics, and machine learning performance scores, all structured into pandas DataFrames.
Attributes:
sim (Simulation): An instance of the Simulation class that provides methods
and properties necessary for the simulation environment.
"""
def __init__(self, sim):
self.model = sim
[docs]
def get_model_df(self):
"""
Constructs a DataFrame containing all relevant outputs from the simulation model.
The DataFrame includes objective values, raw outputs, standardized outputs,
and the inputs to the model, providing a comprehensive view of the model's performance.
Returns:
pd.DataFrame: A DataFrame combining input data with simulation outputs including
objective values, raw and standardized outputs.
"""
self.y = pd.DataFrame.from_records(self.model.y)
self.objective = self.y.sum(axis=1).rename('objective')
self.y_raw = pd.DataFrame.from_records(self.model.y_raw).add_suffix('_raw')
self.y_std = pd.DataFrame.from_records(self.model.y_std).add_suffix('_std')
self.model_df = pd.concat([self.model.X_df, self.objective, self.y, self.y_std, self.y_raw], axis=1)
return self.model_df
[docs]
def get_timing(self):
"""
Retrieves and formats the timing information for the simulation.
The method extracts timing data for the simulation setup, model building,
and acquisition phases, and formats it into a readable DataFrame.
Returns:
pd.DataFrame: A DataFrame containing detailed timing information for different
phases of the simulation, suffixed with ' [s]' to denote seconds.
"""
timing = {
'Simulation': self.model.sim_time,
'Build': self.model.build_time,
'Acquisition': [0] + self.model.acquisition_time,
'Total': self.model.optimize_time
}
self.timing = pd.DataFrame.from_dict(timing, orient='index').T.add_suffix(' [s]')
return self.timing
[docs]
def get_machine_learning_scores(self):
"""
Retrieves the performance scores of the machine learning models used in the simulation.
The scores are extracted from the simulation model and formatted into a DataFrame
to provide a clear view of each model's performance.
Returns:
pd.DataFrame: A DataFrame containing performance scores for each machine learning model.
"""
self.ml_model_scores = pd.DataFrame.from_dict(self.model.model_scores, orient='index').T
return self.ml_model_scores
[docs]
def get_total(self):
"""
Compiles all collected data into a single comprehensive DataFrame.
This method aggregates model outputs, timing data, and machine learning scores into one DataFrame,
facilitating an integrated view of all results and metrics from the simulation.
Returns:
pd.DataFrame: A merged DataFrame containing all collected data, allowing for easy analysis and visualization.
"""
self.get_model_df()
self.get_timing()
self.get_machine_learning_scores()
self.total = self.model_df.merge(self.timing, left_on='Iteration', right_index=True)
self.total = self.total.merge(self.ml_model_scores, left_on='Iteration', right_index=True)
return self.total
[docs]
def get_parameters(variables):
"""
Extracts and formats parameters from a dictionary for use in the Ax-platform optimization tool.
Parameters
----------
variables : dict
A dictionary where keys correspond to parameter types (e.g., 'range', 'ordinal', 'choice')
and values provide the definitions of these parameters.
Returns
-------
list
A list of parameter definitions formatted for use in optimization routines,
with each parameter represented as a dictionary detailing its name,
type, and constraints or choices.
"""
parameters = []
for k in variables:
for key,value in variables[k].items():
typ = 'choice' if k == 'ordinal' else k
if typ != 'choice':
parameters.append(
{
"name": str(key),
"type": typ,
"bounds": value[0],
})
else:
parameters.append(
{
"name": str(key),
"type": typ,
"values": value,
})
return parameters