Usage: RegressorΒΆ

from cosmic_toolbox import arraytools as at, file_utils
import numpy as np
from sklearn.model_selection import train_test_split

from edelweiss.regressor import (
    Regressor, load_regressor
)

np.random.seed(1996)
# Load the dataset and split
from sklearn.datasets import fetch_california_housing

data = fetch_california_housing()
X = at.arr2rec(data.data, names=data["feature_names"])
y = at.arr2rec(data.target.reshape(-1,1), names=data["target_names"])
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)
# select scaling method and regressor
reg = Regressor(scaler="robust", reg="XGB")
reg.train(X_train, y_train)
reg.test(X_test, y_test)
24-07-25 14:57:07 regressor. INF   Training regressor
24-07-25 14:57:07 regressor. INF   Input parameters: ('MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude')
24-07-25 14:57:07 regressor. INF   Output parameters: ('MedHouseVal',)
24-07-25 14:57:07 regressor. INF   Number of training samples: 14448
24-07-25 14:57:07 regressor. INF   Testing regressor
24-07-25 14:57:07 regressor. INF   Number of test samples: 6192
24-07-25 14:57:07 regressor. INF   max MAD: 3.513500988704888
24-07-25 14:57:07 regressor. INF   max MSE: 13.657229305443849
24-07-25 14:57:07 regressor. INF   max Max error: 5.430479131469727
24-07-25 14:57:07 regressor. INF   max relative MAD: 1.6998331595014768
24-07-25 14:57:07 regressor. INF   max relative MSE: 2.4453858863188693
24-07-25 14:57:07 regressor. INF   max relative Max error: 1.0860936541066373
# the regressor can then be used for predictions, can be saved and loaded
y_pred = reg.predict(X_test)  # or also reg(X_test)

reg.save("reg")
reg = load_regressor("reg")
file_utils.robust_remove("reg")
24-07-25 14:57:07 file_utils INF   Created directory reg
24-07-25 14:57:07 regressor. INF   Regressor saved to reg
24-07-25 14:57:07 regressor. INF   Regressor loaded from reg