Usage: Regressor
from cosmic_toolbox import arraytools as at, file_utils
import numpy as np
from sklearn.model_selection import train_test_split
from edelweiss.regressor import (
Regressor, load_regressor
)
np.random.seed(1996)
# Load the dataset and split
from sklearn.datasets import fetch_california_housing
data = fetch_california_housing()
X = at.arr2rec(data.data, names=data["feature_names"])
y = at.arr2rec(data.target.reshape(-1,1), names=data["target_names"])
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.3, random_state=42
)
# select scaling method and regressor
reg = Regressor(scaler="robust", reg="XGB")
reg.train(X_train, y_train)
reg.test(X_test, y_test)
24-07-25 14:57:07 regressor. INF Training regressor
24-07-25 14:57:07 regressor. INF Input parameters: ('MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude')
24-07-25 14:57:07 regressor. INF Output parameters: ('MedHouseVal',)
24-07-25 14:57:07 regressor. INF Number of training samples: 14448
24-07-25 14:57:07 regressor. INF Testing regressor
24-07-25 14:57:07 regressor. INF Number of test samples: 6192
24-07-25 14:57:07 regressor. INF max MAD: 3.513500988704888
24-07-25 14:57:07 regressor. INF max MSE: 13.657229305443849
24-07-25 14:57:07 regressor. INF max Max error: 5.430479131469727
24-07-25 14:57:07 regressor. INF max relative MAD: 1.6998331595014768
24-07-25 14:57:07 regressor. INF max relative MSE: 2.4453858863188693
24-07-25 14:57:07 regressor. INF max relative Max error: 1.0860936541066373
# the regressor can then be used for predictions, can be saved and loaded
y_pred = reg.predict(X_test) # or also reg(X_test)
reg.save("reg")
reg = load_regressor("reg")
file_utils.robust_remove("reg")
24-07-25 14:57:07 file_utils INF Created directory reg
24-07-25 14:57:07 regressor. INF Regressor saved to reg
24-07-25 14:57:07 regressor. INF Regressor loaded from reg