Coverage for src/edelweiss/custom

2# Institute for Particle Physics and Astrophysics

3# Author: Silvan Fischbacher

4# created: Mon Nov 06 2023

6import numpy as np

7import tensorflow as tf

8from sklearn.base import BaseEstimator, ClassifierMixin

9from sklearn.preprocessing import LabelEncoder

10from tensorflow.keras.callbacks import EarlyStopping

11from tensorflow.keras.layers import Dropout

13from edelweiss.tf_utils import EpochProgressCallback

16class NeuralNetworkClassifier(BaseEstimator, ClassifierMixin):

17 """

18 Neural network classifier based on Keras Sequential model

20 :param hidden_units: tuple/list, optional (default=(64, 32))

21 The number of units per hidden layer

22 :param learning_rate: float, optional (default=0.001)

23 The learning rate for the Adam optimizer

24 :param epochs: int, optional (default=10)

25 The number of epochs to train the model

26 :param batch_size: int, optional (default=32)

27 The batch size for training the model

28 :param loss: str, optional (default="auto")

29 The loss function to use, defaults to binary_crossentropy if binary and

30 sparse_categorical_crossentropy if multiclass

31 :param activation: str, optional (default="relu")

32 The activation function to use for the hidden layers

33 :param activation_output: str, optional (default="auto")

34 The activation function to use for the output layer, defaults to sigmoid for

35 single class and softmax for multiclass

36 :param sample_weight_col: int, optional (default=None)

37 """

39 def __init__(

40 self,

41 hidden_units=(64, 32),

42 learning_rate=0.001,

43 epochs=10,

44 batch_size=32,

45 loss="auto",

46 activation="relu",

47 activation_output="auto",

48 ):

49 self.hidden_units = hidden_units

50 self.learning_rate = learning_rate

51 self.epochs = epochs

52 self.batch_size = batch_size

53 self.loss = loss

54 self.activation = activation

55 self.activation_output = activation_output

57 def fit(self, X, y, sample_weight=None, early_stopping_patience=10):

58 """

59 Fit the neural network model

61 :param X: array-like, shape (n_samples, n_features)

62 The training input samples

63 :param y: array-like, shape (n_samples,)

64 The target values

65 :param sample_weight: array-like, shape (n_samples,), optional (default=None)

66 Sample weights

67 :param early_stopping_patience: int, optional (default=10)

68 The number of epochs with no improvement after which training will be

69 stopped

70 """

72 # Encode labels

73 self.label_encoder = LabelEncoder()

74 y_encoded = self.label_encoder.fit_transform(y)

75 self.classes_ = self.label_encoder.classes_

77 # Determine if it's binary or multiclass

78 self.n_classes_ = len(self.classes_)

79 self.is_binary_ = self.n_classes_ == 2

81 # Adjust loss and activation_output based on problem type

82 if self.loss == "auto":

83 self.loss_ = (

84 "binary_crossentropy"

85 if self.is_binary_

86 else "sparse_categorical_crossentropy"

87 )

88 else:

89 self.loss_ = self.loss

90 if self.activation_output == "auto":

91 self.activation_output_ = "sigmoid" if self.is_binary_ else "softmax"

92 else:

93 self.activation_output_ = self.activation_output

95 # Build the neural network model

96 model = self._build_model(X.shape[1])

98 # Compile the model

99 model.compile(

100 optimizer=tf.keras.optimizers.Adam(learning_rate=self.learning_rate),

101 loss=self.loss_,

102 metrics=["accuracy"],

103 )

104

105 # Add early stopping

106 early_stopping = EarlyStopping(

107 monitor="val_loss",

108 patience=early_stopping_patience,

109 restore_best_weights=True,

110 )

111

112 # Fit the model

113 model.fit(

114 X,

115 y_encoded,

116 sample_weight=sample_weight,

117 epochs=self.epochs,

118 batch_size=self.batch_size,

119 validation_split=0.2, # use 20% of the training data as validation data

120 callbacks=[early_stopping, EpochProgressCallback(total_epochs=self.epochs)],

121 verbose=0,

122 )

123

124 self.model = model

125

126 def _build_model(self, input_dim):

127 """

128 Build the neural network model

129

130 :param input_dim: int

131 The number of input features

132 :return: tf.keras.Sequential

133 The built model

134 """

135 model = tf.keras.Sequential()

136 model.add(tf.keras.layers.InputLayer(shape=(input_dim,)))

137 for units in self.hidden_units:

138 model.add(tf.keras.layers.Dense(units, activation=self.activation))

139 model.add(Dropout(0.2))

140 model.add(

141 tf.keras.layers.Dense(

142 1 if self.is_binary_ else self.n_classes_,

143 activation=self.activation_output_,

144 )

145 )

146 model.summary()

147 return model

148

149 def predict(self, X):

150 """

151 Predict the class labels for the provided data

152

153 :param X: array-like, shape (n_samples, n_features)

154 The input samples

155 :return: array-like, shape (n_samples,)

156 The predicted class labels

157 """

158 y_pred = self.model.predict(X, verbose=0)

159 return self.label_encoder.inverse_transform(np.argmax(y_pred, axis=1))

160

161 def predict_proba(self, X):

162 """

163 Predict the class probabilities for the provided data

164

165 :param X: array-like, shape (n_samples, n_features)

166 The input samples

167 :return: array-like, shape (n_samples, n_classes)

168 The predicted class probabilities

169 """

170 y_prob = self.model.predict(X, verbose=0)

171

172 # for backwards compatibility

173 if not hasattr(self, "is_binary_"):

174 self.is_binary_ = True

175

176 if self.is_binary_:

177 y_prob = y_prob.flatten()

178 return np.column_stack((1 - y_prob, y_prob))

179 else:

180 return y_prob

Coverage for src/edelweiss/custom_clfs.py: 92%

53 statements