Source code for mlgw_bns.neural_network

from __future__ import annotations

import logging
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from typing import IO, TYPE_CHECKING, Optional, Union

import joblib  # type: ignore
import numpy as np
import pkg_resources
from sklearn.neural_network import MLPRegressor  # type: ignore
from sklearn.preprocessing import StandardScaler  # type: ignore

if TYPE_CHECKING:
    import optuna
    import torch  # type: ignore
    import torch.utils.data as Data  # type: ignore
    from torch.autograd import Variable  # type: ignore

TRIALS_FILE = "data/best_trials.pkl"


[docs]@dataclass
class Hyperparameters:
    r"""Dataclass containing the parameters which are passed to
    the neural network for training, as well as a few more
    (:attr:`pc_exponent`) and (:attr:`n_train`).

    Parameters
    ----------
    pc_exponent: float
            Exponent to be used in the normalization of the
            principal components: the network learns to reconstruct
            :math:`x_i \lambda_i^\alpha`, where
            :math:`x_i` are the principal-component
            representation of a waveform, while
            :math:`\lambda_i` are the eigenvalues of the PCA
            and finally :math:`\alpha` is this parameter.
    n_train: float
            Number of waveforms to use in the training.
    hidden_layer_sizes: tuple[int, ...]
            Sizes of the layers in the neural network.
            For more details, refer to the documentation
            of the :class:`MLPRegressor`.
    activation: str
            Activation function.
            For more details, refer to the documentation
            of the :class:`MLPRegressor`.
    alpha: float
            Regularization parameter.
            For more details, refer to the documentation
            of the :class:`MLPRegressor`.
    batch_size: int
            For more details, refer to the documentation
            of the :class:`MLPRegressor`.
    learning_rate_init: float
            For more details, refer to the documentation
            of the :class:`MLPRegressor`.
    tol: float
            For more details, refer to the documentation
            of the :class:`MLPRegressor`.
    validation_fraction: float
            For more details, refer to the documentation
            of the :class:`MLPRegressor`.
    n_iter_no_change: float
            For more details, refer to the documentation
            of the :class:`MLPRegressor`.
    """

    # controls how much weight is give to higher principal components
    pc_exponent: float

    # number of training data points to use
    n_train: int

    # parameters for the sklearn neural network
    hidden_layer_sizes: tuple[int, ...]
    activation: str
    alpha: float
    batch_size: int
    learning_rate_init: float
    tol: float
    validation_fraction: float
    n_iter_no_change: float

    max_iter: int = field(default=1000)

    @property
    def nn_params(self) -> dict[str, Union[int, float, str, bool, tuple[int, ...]]]:
        """Return a dictionary which can be readily unpacked
        and used to initialize a :class:`MLPRegressor`.
        """

        return {
            "max_iter": self.max_iter,
            "hidden_layer_sizes": self.hidden_layer_sizes,
            "activation": self.activation,
            "alpha": self.alpha,
            "learning_rate_init": self.learning_rate_init,
            "tol": self.tol,
            "validation_fraction": self.validation_fraction,
            "n_iter_no_change": self.n_iter_no_change,
            "early_stopping": False,
            "shuffle": True,
            "batch_size": min(
                self.batch_size, int(self.n_train * (1 - self.validation_fraction))
            ),
        }

[docs]    @classmethod
    def from_trial(cls, trial: "optuna.Trial", n_train_max: int):
        """Generate the hyperparameter set starting from an
        :class:`optuna.Trial`.

        Parameters
        ----------
        trial : optuna.Trial
                Used to generate the parameters.
        n_train_max : int
                Upper bound for the attribute :attr:`n_train`.
        """

        n_layers = trial.suggest_int("n_layers", 1, 4)

        layers = tuple(
            trial.suggest_int(f"size_layer_{i}", 10, 200) for i in range(n_layers)
        )

        return cls(
            hidden_layer_sizes=layers,
            activation=str(
                trial.suggest_categorical("activation", ["relu", "tanh", "logistic"])
            ),
            alpha=trial.suggest_loguniform("alpha", 1e-6, 1e-1),  # Default: 1e-4
            batch_size=trial.suggest_int("batch_size", 100, 200),  # Default: 200
            learning_rate_init=trial.suggest_loguniform(
                "learning_rate_init", 2e-4, 5e-2
            ),  # Default: 1e-3
            tol=trial.suggest_loguniform("tol", 1e-15, 1e-7),  # default: 1e-4
            validation_fraction=trial.suggest_uniform("validation_fraction", 0.05, 0.2),
            n_iter_no_change=trial.suggest_int(
                "n_iter_no_change", 40, 100, log=True
            ),  # default: 10
            pc_exponent=trial.suggest_loguniform("pc_exponent", 1e-3, 1),
            n_train=trial.suggest_int("n_train", 50, n_train_max),
        )

    @classmethod
    def from_frozen_trial(cls, frozen_trial: "optuna.trial.FrozenTrial"):

        params = frozen_trial.params
        n_layers = params.pop("n_layers")

        layers = [params.pop(f"size_layer_{i}") for i in range(n_layers)]
        params["hidden_layer_sizes"] = tuple(layers)

        return cls(**params)

    @classmethod
    def default(cls, training_waveform_number: Optional[int] = None):

        try:
            if training_waveform_number is not None:
                best_trials = retrieve_best_trials_list()
                return best_trial_under_n(best_trials, training_waveform_number)
        except (FileNotFoundError, IndexError):
            pass

        return cls(
            hidden_layer_sizes=(50, 50),
            activation="relu",
            alpha=1e-4,
            batch_size=200,
            learning_rate_init=1e-3,
            tol=1e-9,
            n_iter_no_change=50,
            validation_fraction=0.1,
            pc_exponent=0.2,
            n_train=200,
        )


[docs]class NeuralNetwork(ABC):
    """Abstract class for a wrapper around
    a generic neural network.
    """

    def __init__(self, hyper: Hyperparameters):
        self.hyper = hyper

[docs]    @abstractmethod
    def fit(self, x_data: np.ndarray, y_data: np.ndarray) -> None:
        """Fit the network to the data.

        Parameters
        ----------
        x_data : np.ndarray
        y_data : np.ndarray
        """

[docs]    @abstractmethod
    def predict(self, x_data: np.ndarray) -> np.ndarray:
        """Make a prediction for a new set of data.

        Parameters
        ----------
        x_data : np.ndarray

        Returns
        -------
        np.ndarray
        """

[docs]    @abstractmethod
    def save(self, filename: str) -> None:
        """Save self to file.

        Parameters
        ----------
        filename : str
            File to save to.
        """

[docs]    @classmethod
    @abstractmethod
    def from_file(self, filename: Union[IO[bytes], str]):
        """Load object from file.

        Parameters
        ----------
        filename : str
            File to read

        Returns
        -------
        NeuralNetwork
        """


[docs]class SklearnNetwork(NeuralNetwork):
    """Wrapper for a MLPRegressor, the regressor provided by
    the library `scikit-learn <https://scikit-learn.org/stable/>`_.
    """

    def __init__(
        self,
        hyper: Hyperparameters,
        nn: Optional[MLPRegressor] = None,
        param_scaler: Optional[StandardScaler] = None,
    ):
        super().__init__(hyper=hyper)
        self.nn = MLPRegressor(**hyper.nn_params) if nn is None else nn
        if param_scaler is not None:
            self.param_scaler: StandardScaler = param_scaler

[docs]    def fit(self, x_data: np.ndarray, y_data: np.ndarray) -> None:
        self.param_scaler = StandardScaler().fit(x_data)

        old_batch_size = self.nn.batch_size
        self.nn.batch_size = min(self.nn.batch_size, x_data.shape[1])

        scaled_x = self.param_scaler.transform(x_data)
        self.nn.fit(scaled_x, y_data)

        self.nn.batch_size = old_batch_size

[docs]    def predict(self, x_data: np.ndarray) -> np.ndarray:
        scaled_x = self.param_scaler.transform(x_data)
        return self.nn.predict(scaled_x)

[docs]    def save(self, filename: str):
        joblib.dump((self.hyper, self.nn, self.param_scaler), filename)

[docs]    @classmethod
    def from_file(cls, filename: Union[IO[bytes], str]):
        return cls(*joblib.load(filename))


class TorchNetwork(NeuralNetwork):
    """Wrapper for a network using pytorch."""

    def __init__(
        self,
        hyper: Hyperparameters,
        nn: Optional["torch.nn.modules.container.Sequential"] = None,
    ) -> None:
        import torch  # type : ignore
        import torch.utils.data as Data  # type : ignore
        from torch.autograd import Variable  # type : ignore

        super().__init__(hyper=hyper)

        self.nn = nn if nn is not None else self.make_nn()

    def make_nn(
        self, size_in: int = 5, size_out: int = 30
    ) -> "torch.nn.modules.container.Sequential":
        activations = {
            "logistic": torch.nn.LogSigmoid(),
            "tanh": torch.nn.Tanh(),
            "relu": torch.nn.ReLU(),
        }
        activation = activations[self.hyper.activation]

        sizes_in = (size_in,) + self.hyper.hidden_layer_sizes
        sizes_out = self.hyper.hidden_layer_sizes + (size_out,)

        return torch.nn.Sequential(
            *(
                layer
                for s_in, s_out in zip(sizes_in, sizes_out)
                for layer in (activation, torch.nn.Linear(s_in, s_out))
            )
        )

    def fit(self, x_data: np.ndarray, y_data: np.ndarray) -> None:
        # TODO: add validation
        # from pytorch ignite maybe?
        # https://pytorch.org/ignite/generated/ignite.handlers.early_stopping.EarlyStopping.html

        x, y = Variable(x_data), Variable(y_data)

        torch_dataset = Data.TensorDataset(x, y)

        loader_iterable = iter(
            Data.DataLoader(
                dataset=torch_dataset, batch_size=self.hyper.batch_size, shuffle=True
            )
        )

        optimizer = torch.optim.Adam(
            self.nn.parameters(),
            lr=self.hyper.learning_rate_init,
            weight_decay=self.hyper.alpha,
        )

        loss_func = torch.nn.MSELoss()

        n_iters_no_improvement = 0
        previous_loss = np.inf

        while n_iters_no_improvement < self.hyper.n_iter_no_change:
            batch_x, batch_y = next(loader_iterable)

            b_x = Variable(batch_x)
            b_y = Variable(batch_y)

            prediction = self.nn(b_x)

            loss = loss_func(prediction, b_y)

            optimizer.zero_grad()  # clear gradients for next train
            loss.backward()  # backpropagation, compute gradients
            optimizer.step()  # apply gradients

            loss_improvement = previous_loss - loss.item()
            if abs(loss_improvement) > self.hyper.tol:
                n_iters_no_improvement = 0
            else:
                n_iters_no_improvement += 1

    def predict(self, x_data: np.ndarray) -> np.ndarray:
        x = Variable(x_data)

        return self.nn(x).numpy()

    def save(self, filename: str):
        joblib.dump((self.hyper, self.nn), filename)

    @classmethod
    def from_file(cls, filename: Union[IO[bytes], str]):
        return cls(*joblib.load(filename))


[docs]def retrieve_best_trials_list() -> "list[optuna.trial.FrozenTrial]":
    """Read the list of best trials which is provided
    with the package.

    This list's location can be found at the location
    defined by ``TRIALS_FILE``;
    if one wishes to modify it, a new one can be generated
    with the method :meth:`save_best_trials_to_file` of the
    class :class:`mlgw_bns.hyperparameter_optimization.HyperparameterOptimization`
    after an optimization job has been run.

    The current trials provided are the result of about 30 hours
    of optimization on a laptop.

    Returns
    -------
    list[optuna.trial.FrozenTrial]
        List of the trials in the Pareto front of an optimization.
    """

    stream = pkg_resources.resource_stream(__name__, TRIALS_FILE)
    return joblib.load(stream)


[docs]def best_trial_under_n(
    best_trials: "list[optuna.trial.FrozenTrial]", training_number: int
) -> Hyperparameters:
    """Utility function to retrieve
    a set of hyperparameters starting from a list of optimization trials.
    The best trial in terms of accuracy is returned.

    Parameters
    ----------
    best_trials : list[optuna.trial.FrozenTrial]
        List of trials in the Pareto front of an optimization run.
        A default value for such a list is the one provided by
        :func:`retrieve_best_trials_list`.
    training_number : int
        Return the best trial obtained while using less than this number
        of training waveforms.

    Returns
    -------
    Hyperparameters
        Hyperparameters corresponding to the best trial found.
    """

    accuracy = lambda trial: trial.values[0]

    # take the most accurate trial
    # which used less training data than the given
    # training number
    best_trial = sorted(
        [trial for trial in best_trials if trial.params["n_train"] <= training_number],
        key=accuracy,
    )[0]

    hyper = Hyperparameters.from_frozen_trial(best_trial)
    hyper.n_train = training_number
    return hyper