WARNING:amltk.optimization.optimizers.neps:There are 1 configs that were sampled, but have no worker assigned. Sometimes this is due to a delay in the filesystem communication, but most likely some configs crashed during their execution or a jobtime-limit was reached.
class NEPSOptimizer(Optimizer[NEPSTrialInfo]):
"""An optimizer that uses SMAC to optimize a config space."""
def __init__(
self,
*,
space: SearchSpace,
optimizer: BaseOptimizer,
working_dir: Path,
bucket: Bucket | None = None,
ignore_errors: bool = True,
loss_value_on_error: float | None = None,
cost_value_on_error: float | None = None,
) -> None:
"""Initialize the optimizer.
Args:
space: The space to use.
optimizer: The optimizer to use.
working_dir: The directory to use for the optimization.
bucket: The bucket to give to trials generated from this optimizer.
ignore_errors: Whether the optimizers should ignore errors from trials.
loss_value_on_error: The value to use for the loss if the trial fails.
cost_value_on_error: The value to use for the cost if the trial fails.
"""
super().__init__(bucket=bucket)
self.space = space
self.optimizer = optimizer
self.working_dir = working_dir
self.ignore_errors = ignore_errors
self.loss_value_on_error = loss_value_on_error
self.cost_value_on_error = cost_value_on_error
self.optimizer_state_file = self.working_dir / "optimizer_state.yaml"
self.base_result_directory = self.working_dir / "results"
self.serializer = metahyper.utils.YamlSerializer(self.optimizer.load_config)
self.working_dir.mkdir(parents=True, exist_ok=True)
self.base_result_directory.mkdir(parents=True, exist_ok=True)
@classmethod
def create( # noqa: PLR0913
cls,
*,
space: (
SearchSpace
| ConfigurationSpace
| Mapping[str, ConfigurationSpace | Parameter]
),
bucket: Bucket | None = None,
searcher: str | BaseOptimizer = "default",
working_dir: str | Path = "neps",
overwrite: bool = True,
loss_value_on_error: float | None = None,
cost_value_on_error: float | None = None,
max_cost_total: float | None = None,
ignore_errors: bool = True,
searcher_kwargs: Mapping[str, Any] | None = None,
) -> Self:
"""Create a new NEPS optimizer.
Args:
space: The space to use.
bucket: The bucket to give to trials generated by this optimizer.
searcher: The searcher to use.
working_dir: The directory to use for the optimization.
overwrite: Whether to overwrite the working directory if it exists.
loss_value_on_error: The value to use for the loss if the trial fails.
cost_value_on_error: The value to use for the cost if the trial fails.
max_cost_total: The maximum cost to use for the optimization.
!!! warning
This only effects the optimization if the searcher utilizes the
budget for it's actual suggestion of the next config. If the
searcher does not use the budget. This parameter has no effect.
The user is still expected to stop `ask()`'ing for configs when
they have reached some budget.
ignore_errors: Whether the optimizers should ignore errors from trials
or whether they should be taken into account. Please set `loss_on_value`
and/or `cost_value_on_error` if you set this to `False`.
searcher_kwargs: Additional kwargs to pass to the searcher.
"""
space = _to_neps_space(space)
searcher = _to_neps_searcher(
space=space,
searcher=searcher,
loss_value_on_error=loss_value_on_error,
cost_value_on_error=cost_value_on_error,
max_cost_total=max_cost_total,
ignore_errors=ignore_errors,
searcher_kwargs=searcher_kwargs,
)
working_dir = Path(working_dir)
if working_dir.exists() and overwrite:
logger.info(f"Removing existing working directory {working_dir}")
shutil.rmtree(working_dir)
return cls(
space=space,
bucket=bucket,
optimizer=searcher,
working_dir=working_dir,
loss_value_on_error=loss_value_on_error,
cost_value_on_error=cost_value_on_error,
)
@override
def ask(self) -> Trial[NEPSTrialInfo]:
"""Ask the optimizer for a new config.
Returns:
The trial info for the new config.
"""
with self.optimizer.using_state(self.optimizer_state_file, self.serializer):
(
config_id,
config,
pipeline_directory,
previous_pipeline_directory,
) = metahyper.api._sample_config( # type: ignore
optimization_dir=self.working_dir,
sampler=self.optimizer,
serializer=self.serializer,
logger=logger,
)
if isinstance(config, SearchSpace):
_config = config.hp_values()
else:
_config = {
k: v.value if isinstance(v, Parameter) else v for k, v in config.items()
}
info = NEPSTrialInfo(
name=str(config_id),
config=deepcopy(_config),
pipeline_directory=pipeline_directory,
previous_pipeline_directory=previous_pipeline_directory,
)
trial = Trial(
name=info.name,
config=info.config,
info=info,
seed=None,
bucket=self.bucket,
)
logger.debug(f"Asked for trial {trial.name}")
return trial
@override
def tell(self, report: Trial.Report[NEPSTrialInfo]) -> None:
"""Tell the optimizer the result of the sampled config.
Args:
report: The report of the trial.
"""
logger.debug(f"Telling report for trial {report.trial.name}")
info = report.info
assert info is not None
# This is how NEPS handles errors
result: Literal["error"] | dict[str, Any]
if report.status in (Trial.Status.CRASHED, Trial.Status.FAIL):
result = "error"
else:
result = report.results
metadata: dict[str, Any] = {"time_end": report.time.end}
if result == "error":
if not self.ignore_errors:
if self.loss_value_on_error is not None:
report.results["loss"] = self.loss_value_on_error
if self.cost_value_on_error is not None:
report.results["cost"] = self.cost_value_on_error
else:
if (loss := result.get("loss")) is not None:
report.results["loss"] = float(loss)
else:
raise ValueError(
"The 'loss' should be provided if the trial is successful"
f"\n{result=}",
)
cost = result.get("cost")
if (cost := result.get("cost")) is not None:
cost = float(cost)
result["cost"] = cost
account_for_cost = result.get("account_for_cost", True)
if account_for_cost:
with self.optimizer.using_state(
self.optimizer_state_file,
self.serializer,
):
self.optimizer.used_budget += cost
metadata["budget"] = {
"max": self.optimizer.budget,
"used": self.optimizer.used_budget,
"eval_cost": cost,
"account_for_cost": account_for_cost,
}
elif self.optimizer.budget is not None:
raise ValueError(
"'cost' should be provided when the optimizer has a budget"
f"\n{result=}",
)
# Dump results
self.serializer.dump(result, info.pipeline_directory / "result")
# Load and dump metadata
config_metadata = self.serializer.load(info.pipeline_directory / "metadata")
config_metadata.update(metadata)
self.serializer.dump(config_metadata, info.pipeline_directory / "metadata")
@override
@classmethod
def preferred_parser(cls) -> NEPSPreferredParser:
"""The preferred parser for this optimizer."""
# TODO: We might want a custom one for neps.SearchSpace, for now we will
# use config space but without conditions as NePs doesn't support conditionals
return partial(configspace_parser, conditionals=False)