Docs »
edelweiss.custom_clfs
Source code for edelweiss.custom_clfs

# Copyright (C) 2023 ETH Zurich
# Institute for Particle Physics and Astrophysics
# Author: Silvan Fischbacher
# created: Mon Nov 06 2023

import numpy as np
import tensorflow as tf
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Dropout

from edelweiss.tf_utils import EpochProgressCallback



[docs]
class NeuralNetworkClassifier(BaseEstimator, ClassifierMixin):
    """
    Neural network classifier based on Keras Sequential model

    :param hidden_units: tuple/list, optional (default=(64, 32))
        The number of units per hidden layer
    :param learning_rate: float, optional (default=0.001)
        The learning rate for the Adam optimizer
    :param epochs: int, optional (default=10)
        The number of epochs to train the model
    :param batch_size: int, optional (default=32)
        The batch size for training the model
    :param loss: str, optional (default="auto")
        The loss function to use, defaults to binary_crossentropy if binary and
        sparse_categorical_crossentropy if multiclass
    :param activation: str, optional (default="relu")
        The activation function to use for the hidden layers
    :param activation_output: str, optional (default="auto")
        The activation function to use for the output layer, defaults to sigmoid for
        single class and softmax for multiclass
    :param sample_weight_col: int, optional (default=None)
    """

    def __init__(
        self,
        hidden_units=(64, 32),
        learning_rate=0.001,
        epochs=10,
        batch_size=32,
        loss="auto",
        activation="relu",
        activation_output="auto",
    ):
        self.hidden_units = hidden_units
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.batch_size = batch_size
        self.loss = loss
        self.activation = activation
        self.activation_output = activation_output


[docs]
    def fit(self, X, y, sample_weight=None, early_stopping_patience=10):
        """
        Fit the neural network model

        :param X: array-like, shape (n_samples, n_features)
            The training input samples
        :param y: array-like, shape (n_samples,)
            The target values
        :param sample_weight: array-like, shape (n_samples,), optional (default=None)
            Sample weights
        :param early_stopping_patience: int, optional (default=10)
            The number of epochs with no improvement after which training will be
            stopped
        """

        # Encode labels
        self.label_encoder = LabelEncoder()
        y_encoded = self.label_encoder.fit_transform(y)
        self.classes_ = self.label_encoder.classes_

        # Determine if it's binary or multiclass
        self.n_classes_ = len(self.classes_)
        self.is_binary_ = self.n_classes_ == 2

        # Adjust loss and activation_output based on problem type
        if self.loss == "auto":
            self.loss_ = (
                "binary_crossentropy"
                if self.is_binary_
                else "sparse_categorical_crossentropy"
            )
        else:
            self.loss_ = self.loss
        if self.activation_output == "auto":
            self.activation_output_ = "sigmoid" if self.is_binary_ else "softmax"
        else:
            self.activation_output_ = self.activation_output

        # Build the neural network model
        model = self._build_model(X.shape[1])

        # Compile the model
        model.compile(
            optimizer=tf.keras.optimizers.Adam(learning_rate=self.learning_rate),
            loss=self.loss_,
            metrics=["accuracy"],
        )

        # Add early stopping
        early_stopping = EarlyStopping(
            monitor="val_loss",
            patience=early_stopping_patience,
            restore_best_weights=True,
        )

        # Fit the model
        model.fit(
            X,
            y_encoded,
            sample_weight=sample_weight,
            epochs=self.epochs,
            batch_size=self.batch_size,
            validation_split=0.2,  # use 20% of the training data as validation data
            callbacks=[early_stopping, EpochProgressCallback(total_epochs=self.epochs)],
            verbose=0,
        )

        self.model = model


    def _build_model(self, input_dim):
        """
        Build the neural network model

        :param input_dim: int
            The number of input features
        :return: tf.keras.Sequential
            The built model
        """
        model = tf.keras.Sequential()
        model.add(tf.keras.layers.InputLayer(shape=(input_dim,)))
        for units in self.hidden_units:
            model.add(tf.keras.layers.Dense(units, activation=self.activation))
            model.add(Dropout(0.2))
        model.add(
            tf.keras.layers.Dense(
                1 if self.is_binary_ else self.n_classes_,
                activation=self.activation_output_,
            )
        )
        model.summary()
        return model


[docs]
    def predict(self, X):
        """
        Predict the class labels for the provided data

        :param X: array-like, shape (n_samples, n_features)
            The input samples
        :return: array-like, shape (n_samples,)
            The predicted class labels
        """
        y_pred = self.model.predict(X, verbose=0)
        return self.label_encoder.inverse_transform(np.argmax(y_pred, axis=1))



[docs]
    def predict_proba(self, X):
        """
        Predict the class probabilities for the provided data

        :param X: array-like, shape (n_samples, n_features)
            The input samples
        :return: array-like, shape (n_samples, n_classes)
            The predicted class probabilities
        """
        y_prob = self.model.predict(X, verbose=0)

        # for backwards compatibility
        if not hasattr(self, "is_binary_"):
            self.is_binary_ = True

        if self.is_binary_:
            y_prob = y_prob.flatten()
            return np.column_stack((1 - y_prob, y_prob))
        else:
            return y_prob