From a1763694bb1b8ddf911b287ce176592c8e0360d3 Mon Sep 17 00:00:00 2001 From: Atanas Dimitrov Date: Thu, 1 Aug 2024 00:32:35 +0300 Subject: [PATCH 01/17] Init --- include/LightGBM/c_api.h | 17 ++++++ python-package/lightgbm/__init__.py | 3 +- python-package/lightgbm/basic.py | 76 ++++++++++++++++++++++++ src/c_api.cpp | 53 ++++++++++++++++- src/io/config_auto.cpp | 2 +- src/objective/multiclass_objective.hpp | 1 + src/objective/regression_objective.hpp | 1 + tests/python_package_test/test_engine.py | 48 +++++++++++++++ tests/python_package_test/utils.py | 11 ++++ 9 files changed, 209 insertions(+), 3 deletions(-) diff --git a/include/LightGBM/c_api.h b/include/LightGBM/c_api.h index b43f096c31ee..a6b8f34dd525 100644 --- a/include/LightGBM/c_api.h +++ b/include/LightGBM/c_api.h @@ -31,6 +31,7 @@ typedef void* DatasetHandle; /*!< \brief Handle of dataset. */ typedef void* BoosterHandle; /*!< \brief Handle of booster. */ typedef void* FastConfigHandle; /*!< \brief Handle of FastConfig. */ typedef void* ByteBufferHandle; /*!< \brief Handle of ByteBuffer. */ +typedef void* ObjectiveFunctionHandle; /*!< \brief Handle of ObjectiveFunction. */ #define C_API_DTYPE_FLOAT32 (0) /*!< \brief float32 (single precision float). */ #define C_API_DTYPE_FLOAT64 (1) /*!< \brief float64 (double precision float). */ @@ -1563,6 +1564,22 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterGetUpperBoundValue(BoosterHandle handle, LIGHTGBM_C_EXPORT int LGBM_BoosterGetLowerBoundValue(BoosterHandle handle, double* out_results); +/*! + */ +LIGHTGBM_C_EXPORT int LGBM_ObjectiveFunctionCreate(const char *typ, + const char *parameter, + ObjectiveFunctionHandle *out); + +/*! + */ +LIGHTGBM_C_EXPORT int LGBM_ObjectiveFunctionInit(ObjectiveFunctionHandle handle, + int *num_data, + DatasetHandle dataset); + +/*! + */ +LIGHTGBM_C_EXPORT int LGBM_ObjectiveFunctionFree(ObjectiveFunctionHandle handle); + /*! * \brief Initialize the network. * \param machines List of machines in format 'ip1:port1,ip2:port2' diff --git a/python-package/lightgbm/__init__.py b/python-package/lightgbm/__init__.py index 600f71284159..de9ec5fc2232 100644 --- a/python-package/lightgbm/__init__.py +++ b/python-package/lightgbm/__init__.py @@ -6,7 +6,7 @@ from pathlib import Path -from .basic import Booster, Dataset, Sequence, register_logger +from .basic import Booster, Dataset, Sequence, ObjectiveFunction, register_logger from .callback import EarlyStopException, early_stopping, log_evaluation, record_evaluation, reset_parameter from .engine import CVBooster, cv, train @@ -31,6 +31,7 @@ __all__ = [ "Dataset", "Booster", + "ObjectiveFunction", "CVBooster", "Sequence", "register_logger", diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py index af4d757f480b..dc839ed4f1ee 100644 --- a/python-package/lightgbm/basic.py +++ b/python-package/lightgbm/basic.py @@ -5281,3 +5281,79 @@ def __get_eval_info(self) -> None: self.__higher_better_inner_eval = [ name.startswith(("auc", "ndcg@", "map@", "average_precision")) for name in self.__name_inner_eval ] + + +class ObjectiveFunction: + def __init__(self, name: str, params: Dict[str, Any]): + self.name = name + self.params = params + self.num_data = None + self.num_class = params.get("num_class", 1) + + self.__create() + + def init(self, dataset: Dataset) -> "ObjectiveFunction": + return self.__init_from_dataset(dataset) + + def __create(self): + self._handle = ctypes.c_void_p() + _safe_call( + _LIB.LGBM_ObjectiveFunctionCreate( + _c_str(self.name), + _c_str(_param_dict_to_str(self.params)), + ctypes.byref(self._handle), + ) + ) + + def __init_from_dataset(self, dataset: Dataset) -> "ObjectiveFunction": + if dataset._handle is None: + raise ValueError("Cannot create ObjectiveFunction from uninitialised Dataset") + + if self._handle is None: + raise ValueError("Dealocated ObjectiveFunction cannot be initialized") + + ref_dataset = dataset._handle + tmp_num_data = ctypes.c_int(0) + _safe_call( + _LIB.LGBM_ObjectiveFunctionInit( + self._handle, + ctypes.byref(tmp_num_data), + dataset._handle, + ) + ) + self.num_data = tmp_num_data.value + return self + + def __del__(self) -> None: + try: + self._free_handle() + except AttributeError: + pass + + def _free_handle(self) -> "ObjectiveFunction": + if self._handle is not None: + _safe_call(_LIB.LGBM_ObjectiveFunctionFree(self._handle)) + self._handle = None + return self + + def __call__(self, y_pred: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: + if self._handle is None: + raise ValueError("Objective function seems uninitialized") + + if self.num_data is None or self.num_class is None: + # TODO: Be more descriptive + raise ValueError("ObjectiveFunction was not created properly") + + grad = np.zeros(dtype=np.float32, shape=self.num_data * self.num_class) + hess = np.zeros(dtype=np.float32, shape=self.num_data * self.num_class) + + _safe_call( + _LIB.LGBM_ObjectiveFunctionEval( + self._handle, + y_pred.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), + grad.ctypes.data_as(ctypes.POINTER(ctypes.c_float)), + hess.ctypes.data_as(ctypes.POINTER(ctypes.c_float)), + ) + ) + + return (grad, hess) diff --git a/src/c_api.cpp b/src/c_api.cpp index 98748bc9ff2f..cefa17c3dc36 100644 --- a/src/c_api.cpp +++ b/src/c_api.cpp @@ -27,6 +27,7 @@ #include #include #include +#include #include "application/predictor.hpp" #include @@ -43,7 +44,8 @@ inline int LGBM_APIHandleException(const std::string& ex) { return -1; } -#define API_BEGIN() try { +#define API_BEGIN() std::ofstream outf("logs.txt", std::ios_base::app); \ + try { #define API_END() } \ catch(std::exception& ex) { return LGBM_APIHandleException(ex); } \ catch(std::string& ex) { return LGBM_APIHandleException(ex); } \ @@ -907,6 +909,7 @@ using LightGBM::kZeroThreshold; using LightGBM::LGBM_APIHandleException; using LightGBM::Log; using LightGBM::Network; +using LightGBM::ObjectiveFunction; using LightGBM::Random; using LightGBM::ReduceScatterFunction; using LightGBM::SingleRowPredictor; @@ -2587,6 +2590,7 @@ int LGBM_BoosterPredictForMats(BoosterHandle handle, int64_t* out_len, double* out_result) { API_BEGIN(); + outf << parameter << std::endl; auto param = Config::Str2Map(parameter); Config config; config.Set(param); @@ -2747,6 +2751,53 @@ int LGBM_BoosterGetLowerBoundValue(BoosterHandle handle, API_END(); } +LIGHTGBM_C_EXPORT int LGBM_ObjectiveFunctionCreate(const char *typ, + const char *parameter, + ObjectiveFunctionHandle *out) { + API_BEGIN(); + auto param = Config::Str2Map(parameter); + Config config(param); + *out = ObjectiveFunction::CreateObjectiveFunction(std::string(typ), config); + outf << parameter << std::endl; + API_END(); +} + +LIGHTGBM_C_EXPORT int LGBM_ObjectiveFunctionInit(ObjectiveFunctionHandle handle, + int *num_data, + DatasetHandle dataset) { + API_BEGIN(); + ObjectiveFunction* ref_fobj = reinterpret_cast(handle); + Dataset* ref_dataset = reinterpret_cast(dataset); + ref_fobj->Init(ref_dataset->metadata(), ref_dataset->num_data()); + *num_data = ref_dataset->num_data(); + API_END(); +} + +LIGHTGBM_C_EXPORT int LGBM_ObjectiveFunctionEval(ObjectiveFunctionHandle handle, + const double* score, + float* grad, + float* hess) { + API_BEGIN(); + #ifdef SCORE_T_USE_DOUBLE + (void) handle; // UNUSED VARIABLE + (void) grad; // UNUSED VARIABLE + (void) hess; // UNUSED VARIABLE + Log::Fatal("Don't support evaluating objective function when SCORE_T_USE_DOUBLE is enabled"); + #else + ObjectiveFunction* ref_fobj = reinterpret_cast(handle); + ref_fobj->GetGradients(score, grad, hess); + #endif + API_END(); +} + +/*! + */ +LIGHTGBM_C_EXPORT int LGBM_ObjectiveFunctionFree(ObjectiveFunctionHandle handle) { + API_BEGIN(); + delete reinterpret_cast(handle); + API_END(); +} + int LGBM_NetworkInit(const char* machines, int local_listen_port, int listen_time_out, diff --git a/src/io/config_auto.cpp b/src/io/config_auto.cpp index ca4fda1c3d4c..1e4b30bb05c3 100644 --- a/src/io/config_auto.cpp +++ b/src/io/config_auto.cpp @@ -326,7 +326,6 @@ const std::unordered_set& Config::parameter_set() { } void Config::GetMembersFromString(const std::unordered_map& params) { - std::string tmp_str = ""; GetString(params, "data", &data); if (GetString(params, "valid", &tmp_str)) { @@ -588,6 +587,7 @@ void Config::GetMembersFromString(const std::unordered_map Date: Thu, 1 Aug 2024 01:07:45 +0300 Subject: [PATCH 02/17] Fix build errors --- include/LightGBM/c_api.h | 7 +++++++ src/io/config_auto.cpp | 2 +- tests/python_package_test/test_engine.py | 1 - 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/include/LightGBM/c_api.h b/include/LightGBM/c_api.h index a6b8f34dd525..f74fab400e99 100644 --- a/include/LightGBM/c_api.h +++ b/include/LightGBM/c_api.h @@ -1576,6 +1576,13 @@ LIGHTGBM_C_EXPORT int LGBM_ObjectiveFunctionInit(ObjectiveFunctionHandle handle, int *num_data, DatasetHandle dataset); +/*! + */ +LIGHTGBM_C_EXPORT int LGBM_ObjectiveFunctionEval(ObjectiveFunctionHandle handle, + const double* score, + float* grad, + float* hess); + /*! */ LIGHTGBM_C_EXPORT int LGBM_ObjectiveFunctionFree(ObjectiveFunctionHandle handle); diff --git a/src/io/config_auto.cpp b/src/io/config_auto.cpp index 1e4b30bb05c3..ca4fda1c3d4c 100644 --- a/src/io/config_auto.cpp +++ b/src/io/config_auto.cpp @@ -326,6 +326,7 @@ const std::unordered_set& Config::parameter_set() { } void Config::GetMembersFromString(const std::unordered_map& params) { + std::string tmp_str = ""; GetString(params, "data", &data); if (GetString(params, "valid", &tmp_str)) { @@ -587,7 +588,6 @@ void Config::GetMembersFromString(const std::unordered_map Date: Thu, 1 Aug 2024 17:18:23 +0300 Subject: [PATCH 03/17] Fix multiclass --- python-package/lightgbm/basic.py | 6 +- src/objective/multiclass_objective.hpp | 1 - tests/python_package_test/test_engine.py | 73 ++++++++++-------------- tests/python_package_test/utils.py | 31 +++++++--- 4 files changed, 55 insertions(+), 56 deletions(-) diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py index dc839ed4f1ee..bcd9d84d481c 100644 --- a/python-package/lightgbm/basic.py +++ b/python-package/lightgbm/basic.py @@ -5341,11 +5341,11 @@ def __call__(self, y_pred: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: raise ValueError("Objective function seems uninitialized") if self.num_data is None or self.num_class is None: - # TODO: Be more descriptive raise ValueError("ObjectiveFunction was not created properly") - grad = np.zeros(dtype=np.float32, shape=self.num_data * self.num_class) - hess = np.zeros(dtype=np.float32, shape=self.num_data * self.num_class) + data_shape = self.num_data * self.num_class + grad = np.zeros(dtype=np.float32, shape=data_shape) + hess = np.zeros(dtype=np.float32, shape=data_shape) _safe_call( _LIB.LGBM_ObjectiveFunctionEval( diff --git a/src/objective/multiclass_objective.hpp b/src/objective/multiclass_objective.hpp index 0325c427c34e..b2a49a9a40ca 100644 --- a/src/objective/multiclass_objective.hpp +++ b/src/objective/multiclass_objective.hpp @@ -25,7 +25,6 @@ class MulticlassSoftmax: public ObjectiveFunction { public: explicit MulticlassSoftmax(const Config& config) { num_class_ = config.num_class; - std::cout << "We have set " << num_class_ << std::endl; // This factor is to rescale the redundant form of K-classification, to the non-redundant form. // In the traditional settings of K-classification, there is one redundant class, whose output is set to 0 (like the class 0 in binary classification). // This is from the Friedman GBDT paper. diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index 637a0a91f221..90e0305bb23b 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -33,7 +33,7 @@ make_synthetic_regression, mse_obj, pickle_and_unpickle_object, - sklearn_multiclass_custom_objective, + multiclass_custom_objective, softmax, ) @@ -2927,12 +2927,6 @@ def test_default_objective_and_metric(): @pytest.mark.parametrize("use_weight", [True, False]) def test_multiclass_custom_objective(use_weight): - def custom_obj(y_pred, ds): - y_true = ds.get_label() - weight = ds.get_weight() - grad, hess = sklearn_multiclass_custom_objective(y_true, y_pred, weight) - return grad, hess - centers = [[-4, -4], [4, 4], [-4, 4]] X, y = make_blobs(n_samples=1_000, centers=centers, random_state=42) weight = np.full_like(y, 2) @@ -4400,47 +4394,38 @@ def test_quantized_training(): assert quant_rmse < rmse + 6.0 @pytest.mark.parametrize("use_weight", [False, True]) -def test_objective_function_regression(use_weight): - X, y = make_synthetic_regression() - weight = np.random.choice([1, 2], len(X)) if use_weight else None - lgb_train = lgb.Dataset(X, y, weight=weight, init_score=np.zeros(len(X))) - - params = {"verbose": -1, "objective": "regression"} - builtin_loss = builtin_objective("multiclass", copy.deepcopy(params)) - - booster = lgb.train(params, lgb_train, num_boost_round=20) - params["objective"] = mse_obj - booster_custom = lgb.train(params, lgb_train, num_boost_round=20) - params["objective"] = builtin_loss - booster_exposed = lgb.train(params, lgb_train, num_boost_round=20) - np.testing.assert_allclose(booster_exposed.predict(X), booster.predict(X)) - np.testing.assert_allclose(booster_exposed.predict(X), booster_custom.predict(X)) - - y_pred = booster.predict(X) - np.testing.assert_allclose(builtin_loss(y_pred, lgb_train), mse_obj(y_pred, lgb_train)) - -@pytest.mark.parametrize("use_weight", [False, True]) -def test_objective_function_multiclass(use_weight): - def custom_obj(y_pred, ds): - y_true = ds.get_label() - weight = ds.get_weight() - grad, hess = sklearn_multiclass_custom_objective(y_true, y_pred, weight) - return grad, hess - - X, y = make_blobs(n_samples=1_000, centers=[[-4, -4], [4, 4], [-4, 4]], random_state=42) +@pytest.mark.parametrize("test_data", [ + { + "custom_objective": mse_obj, + "objective_name": "regression", + "df": make_synthetic_regression(), + "num_class": 1 + }, + { + "custom_objective": multiclass_custom_objective, + "objective_name": "multiclass", + "df": make_blobs(n_samples=100, centers=[[-4, -4], [4, 4], [-4, 4]], random_state=42), + "num_class": 3 + }, +]) +@pytest.mark.parametrize("num_boost_round", [5, 15]) +def test_objective_function_multiclass(use_weight, test_data, num_boost_round): + X, y = test_data["df"] weight = np.random.choice([1, 2], y.shape) if use_weight else None - lgb_train = lgb.Dataset(X, y, weight=weight, init_score=np.zeros((len(y), 3))) + lgb_train = lgb.Dataset(X, y, weight=weight, init_score=np.zeros((len(y), test_data["num_class"]))) + + params = {"verbose": -1, "objective": test_data["objective_name"], "num_class": test_data["num_class"]} + builtin_loss = builtin_objective(test_data["objective_name"], copy.deepcopy(params)) - params = {"verbose": -1, "objective": "multiclass", "num_class": 3} - builtin_loss = builtin_objective("multiclass", copy.deepcopy(params)) - booster = lgb.train(params, lgb_train, num_boost_round=20) - params["objective"] = custom_obj - booster_custom = lgb.train(params, lgb_train, num_boost_round=20) params["objective"] = builtin_loss - booster_exposed = lgb.train(params, lgb_train, num_boost_round=20) + booster_exposed = lgb.train(params, lgb_train, num_boost_round=num_boost_round) + params["objective"] = test_data["objective_name"] + booster = lgb.train(params, lgb_train, num_boost_round=num_boost_round) + params["objective"] = test_data["custom_objective"] + booster_custom = lgb.train(params, lgb_train, num_boost_round=num_boost_round) np.testing.assert_allclose(booster_exposed.predict(X), booster.predict(X, raw_score=True)) np.testing.assert_allclose(booster_exposed.predict(X), booster_custom.predict(X)) - y_pred = booster.predict(X, raw_score=True) - np.testing.assert_allclose(builtin_loss(y_pred, lgb_train), mse_obj(y_pred, lgb_train)) + y_pred = np.zeros_like(booster.predict(X, raw_score=True)) + np.testing.assert_allclose(builtin_loss(y_pred, lgb_train), test_data["custom_objective"](y_pred, lgb_train)) diff --git a/tests/python_package_test/utils.py b/tests/python_package_test/utils.py index e003a0ef2d68..bd9f1d82c932 100644 --- a/tests/python_package_test/utils.py +++ b/tests/python_package_test/utils.py @@ -161,6 +161,29 @@ def sklearn_multiclass_custom_objective(y_true, y_pred, weight=None): return grad, hess +def multiclass_custom_objective(y_pred, ds): + y_true = ds.get_label() + weight = ds.get_weight() + grad, hess = sklearn_multiclass_custom_objective(y_true, y_pred, weight) + return grad, hess + + +def builtin_objective(name, params): + """Mimics the builtin objective functions to mock training. + """ + def wrapper(y_pred, dtrain): + fobj = lgb.ObjectiveFunction(name, params) + fobj.init(dtrain) + (grad, hess) = fobj(y_pred) + print(grad, hess) + if fobj.num_class != 1: + grad = grad.reshape((fobj.num_class, -1)).transpose() + hess = hess.reshape((fobj.num_class, -1)).transpose() + print(grad, hess) + return (grad, hess) + return wrapper + + def pickle_obj(obj, filepath, serializer): if serializer == "pickle": with open(filepath, "wb") as f: @@ -194,14 +217,6 @@ def pickle_and_unpickle_object(obj, serializer): return obj_from_disk # noqa: RET504 -def builtin_objective(name, params): - def wrapper(y_pred, dtrain): - fobj = lgb.ObjectiveFunction(name, params) - fobj.init(dtrain) - return fobj(y_pred) - return wrapper - - # doing this here, at import time, to ensure it only runs once_per import # instead of once per assertion _numpy_testing_supports_strict_kwarg = "strict" in getfullargspec(np.testing.assert_array_equal).kwonlyargs From 60f95f83e4a0dc1e10a81460ef998a243803601a Mon Sep 17 00:00:00 2001 From: Atanas Dimitrov Date: Thu, 1 Aug 2024 21:03:49 +0300 Subject: [PATCH 04/17] Add comments and prepare for PR --- include/LightGBM/c_api.h | 23 +++++- python-package/lightgbm/basic.py | 95 ++++++++++++++++++------ src/c_api.cpp | 4 +- tests/python_package_test/test_engine.py | 2 +- 4 files changed, 96 insertions(+), 28 deletions(-) diff --git a/include/LightGBM/c_api.h b/include/LightGBM/c_api.h index f74fab400e99..872a3ec355b0 100644 --- a/include/LightGBM/c_api.h +++ b/include/LightGBM/c_api.h @@ -1565,18 +1565,34 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterGetLowerBoundValue(BoosterHandle handle, double* out_results); /*! + * \brief Create an objective function. + * \param typ Type of the objective function + * \param parameter Parameters for the objective function + * \param[out] out Handle pointing to the created objective function + * \return 0 when succeed, -1 when failure happens */ LIGHTGBM_C_EXPORT int LGBM_ObjectiveFunctionCreate(const char *typ, const char *parameter, ObjectiveFunctionHandle *out); /*! + * \brief Initialize an objective function with the dataset. + * \param handle Handle of the objective function + * \param dataset Handle of the dataset used for initialization + * \param[out] num_data Number of data points; this may be modified within the function + * \return 0 when succeed, -1 when failure happens */ LIGHTGBM_C_EXPORT int LGBM_ObjectiveFunctionInit(ObjectiveFunctionHandle handle, - int *num_data, - DatasetHandle dataset); + DatasetHandle dataset, + int *num_data); /*! + * \brief Evaluate the objective function given model scores. + * \param handle Handle of the objective function + * \param score Array of scores predicted by the model + * \param[out] grad Gradient result array + * \param[out] hess Hessian result array + * \return 0 when succeed, -1 when failure happens */ LIGHTGBM_C_EXPORT int LGBM_ObjectiveFunctionEval(ObjectiveFunctionHandle handle, const double* score, @@ -1584,6 +1600,9 @@ LIGHTGBM_C_EXPORT int LGBM_ObjectiveFunctionEval(ObjectiveFunctionHandle handle, float* hess); /*! + * \brief Free the memory allocated for an objective function. + * \param handle Handle of the objective function + * \return 0 when succeed, -1 when failure happens */ LIGHTGBM_C_EXPORT int LGBM_ObjectiveFunctionFree(ObjectiveFunctionHandle handle); diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py index bcd9d84d481c..1ed0b166e5fe 100644 --- a/python-package/lightgbm/basic.py +++ b/python-package/lightgbm/basic.py @@ -5284,17 +5284,88 @@ def __get_eval_info(self) -> None: class ObjectiveFunction: + """ + ObjectiveFunction in LightGBM. + + This class exposes the builtin objective functions for evaluating gradients and hessians + on external datasets. LightGBM does not use this wrapper during its training as it is + using the underlying C++ class. + """ + def __init__(self, name: str, params: Dict[str, Any]): + """ + Initialize the ObjectiveFunction. + + Parameters + ---------- + name : str + The name of the objective function. + params : dict + Dictionary of parameters for the objective function. + These are the parameters that would have been passed to ``booster.train``. + The ``name`` should be consistent with the ``params["objective"]`` field. + """ self.name = name self.params = params self.num_data = None self.num_class = params.get("num_class", 1) + if "objective" in params and params["objective"] != self.name: + raise ValueError("The name should be consistent with the params[\"objective\"] field.") + self.__create() def init(self, dataset: Dataset) -> "ObjectiveFunction": + """ + Initialize the objective function using the provided dataset. + + Parameters + ---------- + dataset : Dataset + The dataset object used for initialization. + + Returns + ------- + self : ObjectiveFunction + Initialized objective function object. + """ return self.__init_from_dataset(dataset) + def __call__(self, y_pred: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: + """ + Evaluate the objective function given model predictions. + + Parameters + ---------- + y_pred : numpy.ndarray + Predicted scores from the model. + + Returns + ------- + (grad, hess) : Tuple[np.ndarray, np.ndarray] + A tuple containing gradients and Hessians. + """ + if self._handle is None: + raise ValueError("Objective function seems uninitialized") + + if self.num_data is None or self.num_class is None: + raise ValueError("ObjectiveFunction was not created properly") + + data_shape = self.num_data * self.num_class + grad = np.zeros(dtype=np.float32, shape=data_shape) + hess = np.zeros(dtype=np.float32, shape=data_shape) + + _safe_call( + _LIB.LGBM_ObjectiveFunctionEval( + self._handle, + y_pred.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), + grad.ctypes.data_as(ctypes.POINTER(ctypes.c_float)), + hess.ctypes.data_as(ctypes.POINTER(ctypes.c_float)), + ) + ) + + return (grad, hess) + def __create(self): self._handle = ctypes.c_void_p() _safe_call( @@ -5317,8 +5388,8 @@ def __init_from_dataset(self, dataset: Dataset) -> "ObjectiveFunction": _safe_call( _LIB.LGBM_ObjectiveFunctionInit( self._handle, - ctypes.byref(tmp_num_data), dataset._handle, + ctypes.byref(tmp_num_data), ) ) self.num_data = tmp_num_data.value @@ -5335,25 +5406,3 @@ def _free_handle(self) -> "ObjectiveFunction": _safe_call(_LIB.LGBM_ObjectiveFunctionFree(self._handle)) self._handle = None return self - - def __call__(self, y_pred: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: - if self._handle is None: - raise ValueError("Objective function seems uninitialized") - - if self.num_data is None or self.num_class is None: - raise ValueError("ObjectiveFunction was not created properly") - - data_shape = self.num_data * self.num_class - grad = np.zeros(dtype=np.float32, shape=data_shape) - hess = np.zeros(dtype=np.float32, shape=data_shape) - - _safe_call( - _LIB.LGBM_ObjectiveFunctionEval( - self._handle, - y_pred.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), - grad.ctypes.data_as(ctypes.POINTER(ctypes.c_float)), - hess.ctypes.data_as(ctypes.POINTER(ctypes.c_float)), - ) - ) - - return (grad, hess) diff --git a/src/c_api.cpp b/src/c_api.cpp index cefa17c3dc36..d6ffcf985da1 100644 --- a/src/c_api.cpp +++ b/src/c_api.cpp @@ -2763,8 +2763,8 @@ LIGHTGBM_C_EXPORT int LGBM_ObjectiveFunctionCreate(const char *typ, } LIGHTGBM_C_EXPORT int LGBM_ObjectiveFunctionInit(ObjectiveFunctionHandle handle, - int *num_data, - DatasetHandle dataset) { + DatasetHandle dataset, + int *num_data) { API_BEGIN(); ObjectiveFunction* ref_fobj = reinterpret_cast(handle); Dataset* ref_dataset = reinterpret_cast(dataset); diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index 90e0305bb23b..fbb39fe7e4ae 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -4409,7 +4409,7 @@ def test_quantized_training(): }, ]) @pytest.mark.parametrize("num_boost_round", [5, 15]) -def test_objective_function_multiclass(use_weight, test_data, num_boost_round): +def test_objective_function_class(use_weight, test_data, num_boost_round): X, y = test_data["df"] weight = np.random.choice([1, 2], y.shape) if use_weight else None lgb_train = lgb.Dataset(X, y, weight=weight, init_score=np.zeros((len(y), test_data["num_class"]))) From 58e400378781926ad5b24c1b4d9bd2da5a41e28c Mon Sep 17 00:00:00 2001 From: Atanas Dimitrov Date: Thu, 1 Aug 2024 21:12:18 +0300 Subject: [PATCH 05/17] Fix debug prints --- src/c_api.cpp | 6 +----- src/objective/regression_objective.hpp | 1 - tests/python_package_test/test_engine.py | 2 +- 3 files changed, 2 insertions(+), 7 deletions(-) diff --git a/src/c_api.cpp b/src/c_api.cpp index d6ffcf985da1..9f701f90610c 100644 --- a/src/c_api.cpp +++ b/src/c_api.cpp @@ -27,7 +27,6 @@ #include #include #include -#include #include "application/predictor.hpp" #include @@ -44,8 +43,7 @@ inline int LGBM_APIHandleException(const std::string& ex) { return -1; } -#define API_BEGIN() std::ofstream outf("logs.txt", std::ios_base::app); \ - try { +#define API_BEGIN() try { #define API_END() } \ catch(std::exception& ex) { return LGBM_APIHandleException(ex); } \ catch(std::string& ex) { return LGBM_APIHandleException(ex); } \ @@ -2590,7 +2588,6 @@ int LGBM_BoosterPredictForMats(BoosterHandle handle, int64_t* out_len, double* out_result) { API_BEGIN(); - outf << parameter << std::endl; auto param = Config::Str2Map(parameter); Config config; config.Set(param); @@ -2758,7 +2755,6 @@ LIGHTGBM_C_EXPORT int LGBM_ObjectiveFunctionCreate(const char *typ, auto param = Config::Str2Map(parameter); Config config(param); *out = ObjectiveFunction::CreateObjectiveFunction(std::string(typ), config); - outf << parameter << std::endl; API_END(); } diff --git a/src/objective/regression_objective.hpp b/src/objective/regression_objective.hpp index 1039b8b26534..4f53319bbc49 100644 --- a/src/objective/regression_objective.hpp +++ b/src/objective/regression_objective.hpp @@ -111,7 +111,6 @@ class RegressionL2loss: public ObjectiveFunction { } void Init(const Metadata& metadata, data_size_t num_data) override { - Log::Debug("We are here"); num_data_ = num_data; label_ = metadata.label(); if (sqrt_) { diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index fbb39fe7e4ae..47d63fafa914 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -2937,7 +2937,7 @@ def test_multiclass_custom_objective(use_weight): builtin_obj_bst = lgb.train(params, ds, num_boost_round=10) builtin_obj_preds = builtin_obj_bst.predict(X) - params["objective"] = custom_obj + params["objective"] = multiclass_custom_objective custom_obj_bst = lgb.train(params, ds, num_boost_round=10) custom_obj_preds = softmax(custom_obj_bst.predict(X)) From caa5a499756ac7819c8f3b96c4cc6276c373d30b Mon Sep 17 00:00:00 2001 From: Atanas Dimitrov Date: Thu, 1 Aug 2024 21:28:31 +0300 Subject: [PATCH 06/17] Run pre-commit hooks --- python-package/lightgbm/__init__.py | 2 +- python-package/lightgbm/basic.py | 3 +- tests/python_package_test/test_engine.py | 37 ++++++++++++++---------- tests/python_package_test/utils.py | 5 ++-- 4 files changed, 26 insertions(+), 21 deletions(-) diff --git a/python-package/lightgbm/__init__.py b/python-package/lightgbm/__init__.py index de9ec5fc2232..06ddc8ea5539 100644 --- a/python-package/lightgbm/__init__.py +++ b/python-package/lightgbm/__init__.py @@ -6,7 +6,7 @@ from pathlib import Path -from .basic import Booster, Dataset, Sequence, ObjectiveFunction, register_logger +from .basic import Booster, Dataset, ObjectiveFunction, Sequence, register_logger from .callback import EarlyStopException, early_stopping, log_evaluation, record_evaluation, reset_parameter from .engine import CVBooster, cv, train diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py index 1ed0b166e5fe..42aee7e82173 100644 --- a/python-package/lightgbm/basic.py +++ b/python-package/lightgbm/basic.py @@ -5311,7 +5311,7 @@ def __init__(self, name: str, params: Dict[str, Any]): self.num_class = params.get("num_class", 1) if "objective" in params and params["objective"] != self.name: - raise ValueError("The name should be consistent with the params[\"objective\"] field.") + raise ValueError('The name should be consistent with the params["objective"] field.') self.__create() @@ -5383,7 +5383,6 @@ def __init_from_dataset(self, dataset: Dataset) -> "ObjectiveFunction": if self._handle is None: raise ValueError("Dealocated ObjectiveFunction cannot be initialized") - ref_dataset = dataset._handle tmp_num_data = ctypes.c_int(0) _safe_call( _LIB.LGBM_ObjectiveFunctionInit( diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index 47d63fafa914..abcba7679204 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -32,8 +32,8 @@ logistic_sigmoid, make_synthetic_regression, mse_obj, - pickle_and_unpickle_object, multiclass_custom_objective, + pickle_and_unpickle_object, softmax, ) @@ -4393,25 +4393,30 @@ def test_quantized_training(): quant_rmse = np.sqrt(np.mean((quant_bst.predict(X) - y) ** 2)) assert quant_rmse < rmse + 6.0 + @pytest.mark.parametrize("use_weight", [False, True]) -@pytest.mark.parametrize("test_data", [ - { - "custom_objective": mse_obj, - "objective_name": "regression", - "df": make_synthetic_regression(), - "num_class": 1 - }, - { - "custom_objective": multiclass_custom_objective, - "objective_name": "multiclass", - "df": make_blobs(n_samples=100, centers=[[-4, -4], [4, 4], [-4, 4]], random_state=42), - "num_class": 3 - }, -]) +@pytest.mark.parametrize( + "test_data", + [ + { + "custom_objective": mse_obj, + "objective_name": "regression", + "df": make_synthetic_regression(), + "num_class": 1, + }, + { + "custom_objective": multiclass_custom_objective, + "objective_name": "multiclass", + "df": make_blobs(n_samples=100, centers=[[-4, -4], [4, 4], [-4, 4]], random_state=42), + "num_class": 3, + }, + ], +) @pytest.mark.parametrize("num_boost_round", [5, 15]) def test_objective_function_class(use_weight, test_data, num_boost_round): X, y = test_data["df"] - weight = np.random.choice([1, 2], y.shape) if use_weight else None + rng = np.random.default_rng() + weight = rng.choice([1, 2], y.shape) if use_weight else None lgb_train = lgb.Dataset(X, y, weight=weight, init_score=np.zeros((len(y), test_data["num_class"]))) params = {"verbose": -1, "objective": test_data["objective_name"], "num_class": test_data["num_class"]} diff --git a/tests/python_package_test/utils.py b/tests/python_package_test/utils.py index bd9f1d82c932..5a78683bf865 100644 --- a/tests/python_package_test/utils.py +++ b/tests/python_package_test/utils.py @@ -169,8 +169,8 @@ def multiclass_custom_objective(y_pred, ds): def builtin_objective(name, params): - """Mimics the builtin objective functions to mock training. - """ + """Mimics the builtin objective functions to mock training.""" + def wrapper(y_pred, dtrain): fobj = lgb.ObjectiveFunction(name, params) fobj.init(dtrain) @@ -181,6 +181,7 @@ def wrapper(y_pred, dtrain): hess = hess.reshape((fobj.num_class, -1)).transpose() print(grad, hess) return (grad, hess) + return wrapper From 5d505d1ebe5b9b37c1295c7c9fcd366883ea2179 Mon Sep 17 00:00:00 2001 From: Atanas Dimitrov Date: Thu, 1 Aug 2024 21:31:29 +0300 Subject: [PATCH 07/17] Remove test prints --- tests/python_package_test/utils.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/python_package_test/utils.py b/tests/python_package_test/utils.py index 5a78683bf865..4f1e0f240f24 100644 --- a/tests/python_package_test/utils.py +++ b/tests/python_package_test/utils.py @@ -175,11 +175,9 @@ def wrapper(y_pred, dtrain): fobj = lgb.ObjectiveFunction(name, params) fobj.init(dtrain) (grad, hess) = fobj(y_pred) - print(grad, hess) if fobj.num_class != 1: grad = grad.reshape((fobj.num_class, -1)).transpose() hess = hess.reshape((fobj.num_class, -1)).transpose() - print(grad, hess) return (grad, hess) return wrapper From c1485d3f15e943ec05d597391019913654d07046 Mon Sep 17 00:00:00 2001 From: Atanas Dimitrov Date: Thu, 1 Aug 2024 23:12:15 +0300 Subject: [PATCH 08/17] Add an error when not using CPU --- src/c_api.cpp | 6 +++++- tests/python_package_test/test_engine.py | 6 ++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/c_api.cpp b/src/c_api.cpp index 9f701f90610c..883c41934903 100644 --- a/src/c_api.cpp +++ b/src/c_api.cpp @@ -2754,7 +2754,11 @@ LIGHTGBM_C_EXPORT int LGBM_ObjectiveFunctionCreate(const char *typ, API_BEGIN(); auto param = Config::Str2Map(parameter); Config config(param); - *out = ObjectiveFunction::CreateObjectiveFunction(std::string(typ), config); + if (config.device_type != std::string("cpu")) { + Log::Fatal("Currently the ObjectiveFunction class is only exposed for CPU devices."); + } else { + *out = ObjectiveFunction::CreateObjectiveFunction(std::string(typ), config); + } API_END(); } diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index abcba7679204..7cbeb5792469 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -4424,8 +4424,14 @@ def test_objective_function_class(use_weight, test_data, num_boost_round): params["objective"] = builtin_loss booster_exposed = lgb.train(params, lgb_train, num_boost_round=num_boost_round) + + if getenv("TASK", "") != "cpu": + with pytest.raises(lgb.basic.LightGBMError): + builtin_loss(y, lgb_train) + return params["objective"] = test_data["objective_name"] booster = lgb.train(params, lgb_train, num_boost_round=num_boost_round) + params["objective"] = test_data["custom_objective"] booster_custom = lgb.train(params, lgb_train, num_boost_round=num_boost_round) From b246f606ebeacd22f77a2294b0baca26d103e9d3 Mon Sep 17 00:00:00 2001 From: Atanas Dimitrov Date: Thu, 1 Aug 2024 23:51:19 +0300 Subject: [PATCH 09/17] Skip cuda testing --- tests/python_package_test/test_engine.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index 7cbeb5792469..093648c11bab 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -4413,22 +4413,24 @@ def test_quantized_training(): ], ) @pytest.mark.parametrize("num_boost_round", [5, 15]) +@pytest.mark.skipif(getenv("TASK", "") == "cuda", reason="Skip due to ObjectiveFunction not exposed for cuda devices.") def test_objective_function_class(use_weight, test_data, num_boost_round): X, y = test_data["df"] rng = np.random.default_rng() weight = rng.choice([1, 2], y.shape) if use_weight else None lgb_train = lgb.Dataset(X, y, weight=weight, init_score=np.zeros((len(y), test_data["num_class"]))) - params = {"verbose": -1, "objective": test_data["objective_name"], "num_class": test_data["num_class"]} + params = { + "verbose": -1, + "objective": test_data["objective_name"], + "num_class": test_data["num_class"], + "device": "cpu", + } builtin_loss = builtin_objective(test_data["objective_name"], copy.deepcopy(params)) params["objective"] = builtin_loss booster_exposed = lgb.train(params, lgb_train, num_boost_round=num_boost_round) - if getenv("TASK", "") != "cpu": - with pytest.raises(lgb.basic.LightGBMError): - builtin_loss(y, lgb_train) - return params["objective"] = test_data["objective_name"] booster = lgb.train(params, lgb_train, num_boost_round=num_boost_round) From 698850da79b58a5b7d811dd61c9db3e71bf1a6ee Mon Sep 17 00:00:00 2001 From: Atanas Dimitrov Date: Thu, 8 Aug 2024 18:01:48 +0300 Subject: [PATCH 10/17] Expose convert_outputs as well --- include/LightGBM/c_api.h | 20 +++++++++--- include/LightGBM/objective_function.h | 6 ++++ python-package/lightgbm/basic.py | 39 ++++++++++++++++++++++-- src/c_api.cpp | 26 +++++++++++++--- src/objective/multiclass_objective.hpp | 12 ++++++++ tests/python_package_test/test_engine.py | 6 ++++ tests/python_package_test/utils.py | 2 +- 7 files changed, 100 insertions(+), 11 deletions(-) diff --git a/include/LightGBM/c_api.h b/include/LightGBM/c_api.h index 872a3ec355b0..0152c22a00ee 100644 --- a/include/LightGBM/c_api.h +++ b/include/LightGBM/c_api.h @@ -1594,10 +1594,10 @@ LIGHTGBM_C_EXPORT int LGBM_ObjectiveFunctionInit(ObjectiveFunctionHandle handle, * \param[out] hess Hessian result array * \return 0 when succeed, -1 when failure happens */ -LIGHTGBM_C_EXPORT int LGBM_ObjectiveFunctionEval(ObjectiveFunctionHandle handle, - const double* score, - float* grad, - float* hess); +LIGHTGBM_C_EXPORT int LGBM_ObjectiveFunctionGetGradients(ObjectiveFunctionHandle handle, + const double* score, + float* grad, + float* hess); /*! * \brief Free the memory allocated for an objective function. @@ -1606,6 +1606,18 @@ LIGHTGBM_C_EXPORT int LGBM_ObjectiveFunctionEval(ObjectiveFunctionHandle handle, */ LIGHTGBM_C_EXPORT int LGBM_ObjectiveFunctionFree(ObjectiveFunctionHandle handle); +/*! + * \brief Convert raw scores to outputs. + * \param handle Handle of the objective function + * \param num_data Number of data points + * \param inputs Array of raw scores + * \param[out] outputs Array of outputs + */ +LIGHTGBM_C_EXPORT int LGBM_ObjectiveFunctionConvertOutputs(ObjectiveFunctionHandle handle, + const int num_data, + const double* inputs, + double* outputs); + /*! * \brief Initialize the network. * \param machines List of machines in format 'ip1:port1,ip2:port2' diff --git a/include/LightGBM/objective_function.h b/include/LightGBM/objective_function.h index ad188dc39676..ae09a550ea5f 100644 --- a/include/LightGBM/objective_function.h +++ b/include/LightGBM/objective_function.h @@ -67,6 +67,12 @@ class ObjectiveFunction { /*! \brief Return the number of positive samples. Return 0 if no binary classification tasks.*/ virtual data_size_t NumPositiveData() const { return 0; } + virtual void ConvertOutputs(const int num_data, const double* inputs, double* outputs) const { + for (int i = 0; i < num_data; i ++) { + ConvertOutput(inputs + i, outputs + i); + } + } + virtual void ConvertOutput(const double* input, double* output) const { output[0] = input[0]; } diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py index 42aee7e82173..cd76ccd594cf 100644 --- a/python-package/lightgbm/basic.py +++ b/python-package/lightgbm/basic.py @@ -5331,7 +5331,42 @@ def init(self, dataset: Dataset) -> "ObjectiveFunction": """ return self.__init_from_dataset(dataset) - def __call__(self, y_pred: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: + def convert_outputs(self, scores: np.ndarray) -> np.ndarray: + """ + Convert the raw scores to the final predictions. + + Parameters + ---------- + scores : numpy.ndarray + Raw scores from the model. + + Returns + ------- + result : numpy.ndarray + """ + if self._handle is None: + raise ValueError("Objective function seems uninitialized") + + if self.num_class == 1: + scores = _list_to_1d_numpy(scores, dtype=np.float64, name="scores") + else: + scores = _data_to_2d_numpy(scores, dtype=np.float64, name="scores") + + num_data = scores.size + out_preds = np.zeros_like(scores, dtype=np.float64) + + _safe_call( + _LIB.LGBM_ObjectiveFunctionConvertOutputs( + self._handle, + ctypes.c_int(num_data), + scores.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), + out_preds.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), + ) + ) + + return out_preds + + def get_gradients(self, y_pred: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: """ Evaluate the objective function given model predictions. @@ -5356,7 +5391,7 @@ def __call__(self, y_pred: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: hess = np.zeros(dtype=np.float32, shape=data_shape) _safe_call( - _LIB.LGBM_ObjectiveFunctionEval( + _LIB.LGBM_ObjectiveFunctionGetGradients( self._handle, y_pred.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), grad.ctypes.data_as(ctypes.POINTER(ctypes.c_float)), diff --git a/src/c_api.cpp b/src/c_api.cpp index 883c41934903..5ad3deea43a6 100644 --- a/src/c_api.cpp +++ b/src/c_api.cpp @@ -2773,10 +2773,10 @@ LIGHTGBM_C_EXPORT int LGBM_ObjectiveFunctionInit(ObjectiveFunctionHandle handle, API_END(); } -LIGHTGBM_C_EXPORT int LGBM_ObjectiveFunctionEval(ObjectiveFunctionHandle handle, - const double* score, - float* grad, - float* hess) { +LIGHTGBM_C_EXPORT int LGBM_ObjectiveFunctionGetGradients(ObjectiveFunctionHandle handle, + const double* score, + float* grad, + float* hess) { API_BEGIN(); #ifdef SCORE_T_USE_DOUBLE (void) handle; // UNUSED VARIABLE @@ -2790,6 +2790,24 @@ LIGHTGBM_C_EXPORT int LGBM_ObjectiveFunctionEval(ObjectiveFunctionHandle handle, API_END(); } +LIGHTGBM_C_EXPORT int LGBM_ObjectiveFunctionConvertOutputs(ObjectiveFunctionHandle handle, + const int num_data, + const double* inputs, + double* outputs) { + API_BEGIN(); + #ifdef SCORE_T_USE_DOUBLE + (void) handle; // UNUSED VARIABLE + (void) num_data; // UNUSED VARIABLE + (void) inputs; // UNUSED VARIABLE + (void) outputs; // UNUSED VARIABLE + Log::Fatal("Don't support evaluating objective function when SCORE_T_USE_DOUBLE is enabled"); + #else + ObjectiveFunction* ref_fobj = reinterpret_cast(handle); + ref_fobj->ConvertOutputs(num_data, inputs, outputs); + #endif + API_END(); +} + /*! */ LIGHTGBM_C_EXPORT int LGBM_ObjectiveFunctionFree(ObjectiveFunctionHandle handle) { diff --git a/src/objective/multiclass_objective.hpp b/src/objective/multiclass_objective.hpp index b2a49a9a40ca..3d161cc8864f 100644 --- a/src/objective/multiclass_objective.hpp +++ b/src/objective/multiclass_objective.hpp @@ -129,6 +129,12 @@ class MulticlassSoftmax: public ObjectiveFunction { } } + void ConvertOutputs(const int num_data, const double* inputs, double* outputs) const override { + for (int i = 0; i < num_data; i += num_class_) { + ConvertOutput(inputs + i, outputs + i); + } + } + void ConvertOutput(const double* input, double* output) const override { Common::Softmax(input, output, num_class_); } @@ -236,6 +242,12 @@ class MulticlassOVA: public ObjectiveFunction { return "multiclassova"; } + void ConvertOutputs(const int num_data, const double* inputs, double* outputs) const override { + for (int i = 0; i < num_data; i += num_class_) { + ConvertOutput(inputs + i, outputs + i); + } + } + void ConvertOutput(const double* input, double* output) const override { for (int i = 0; i < num_class_; ++i) { output[i] = 1.0f / (1.0f + std::exp(-sigmoid_ * input[i])); diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index 093648c11bab..bb38ba541a82 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -4427,6 +4427,7 @@ def test_objective_function_class(use_weight, test_data, num_boost_round): "device": "cpu", } builtin_loss = builtin_objective(test_data["objective_name"], copy.deepcopy(params)) + builtin_convert_outputs = lgb.ObjectiveFunction(test_data["objective_name"], copy.deepcopy(params)).convert_outputs params["objective"] = builtin_loss booster_exposed = lgb.train(params, lgb_train, num_boost_round=num_boost_round) @@ -4442,3 +4443,8 @@ def test_objective_function_class(use_weight, test_data, num_boost_round): y_pred = np.zeros_like(booster.predict(X, raw_score=True)) np.testing.assert_allclose(builtin_loss(y_pred, lgb_train), test_data["custom_objective"](y_pred, lgb_train)) + + np.testing.assert_allclose( + builtin_convert_outputs(booster_exposed.predict(X)), + booster.predict(X) + ) diff --git a/tests/python_package_test/utils.py b/tests/python_package_test/utils.py index 4f1e0f240f24..7cd0617642fd 100644 --- a/tests/python_package_test/utils.py +++ b/tests/python_package_test/utils.py @@ -174,7 +174,7 @@ def builtin_objective(name, params): def wrapper(y_pred, dtrain): fobj = lgb.ObjectiveFunction(name, params) fobj.init(dtrain) - (grad, hess) = fobj(y_pred) + (grad, hess) = fobj.get_gradients(y_pred) if fobj.num_class != 1: grad = grad.reshape((fobj.num_class, -1)).transpose() hess = hess.reshape((fobj.num_class, -1)).transpose() From 2ab26f395f133ac24a41601b0954819776e87fe6 Mon Sep 17 00:00:00 2001 From: Atanas Dimitrov Date: Thu, 8 Aug 2024 18:09:57 +0300 Subject: [PATCH 11/17] Appease linter --- tests/python_package_test/test_engine.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index 617da3d9b689..d629080d12b2 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -4445,7 +4445,4 @@ def test_objective_function_class(use_weight, test_data, num_boost_round): y_pred = np.zeros_like(booster.predict(X, raw_score=True)) np.testing.assert_allclose(builtin_loss(y_pred, lgb_train), test_data["custom_objective"](y_pred, lgb_train)) - np.testing.assert_allclose( - builtin_convert_outputs(booster_exposed.predict(X)), - booster.predict(X) - ) + np.testing.assert_allclose(builtin_convert_outputs(booster_exposed.predict(X)), booster.predict(X)) From 4d296cf9a4af4e9b797728c4e3a68b5ed38c58dc Mon Sep 17 00:00:00 2001 From: Atanas Dimitrov Date: Fri, 16 Aug 2024 18:17:26 +0300 Subject: [PATCH 12/17] Refactor tests --- tests/python_package_test/test_engine.py | 43 +++++++++++------------- 1 file changed, 19 insertions(+), 24 deletions(-) diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index d629080d12b2..b7ca2831ef72 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -4396,53 +4396,48 @@ def test_quantized_training(): @pytest.mark.parametrize("use_weight", [False, True]) +@pytest.mark.parametrize("num_boost_round", [5, 15]) @pytest.mark.parametrize( - "test_data", + "custom_objective, objective_name, df, num_class", [ - { - "custom_objective": mse_obj, - "objective_name": "regression", - "df": make_synthetic_regression(), - "num_class": 1, - }, - { - "custom_objective": multiclass_custom_objective, - "objective_name": "multiclass", - "df": make_blobs(n_samples=100, centers=[[-4, -4], [4, 4], [-4, 4]], random_state=42), - "num_class": 3, - }, + (mse_obj, "regression", make_synthetic_regression(), 1), + ( + multiclass_custom_objective, + "multiclass", + make_blobs(n_samples=100, centers=[[-4, -4], [4, 4], [-4, 4]], random_state=42), + 3, + ), ], ) -@pytest.mark.parametrize("num_boost_round", [5, 15]) @pytest.mark.skipif(getenv("TASK", "") == "cuda", reason="Skip due to ObjectiveFunction not exposed for cuda devices.") -def test_objective_function_class(use_weight, test_data, num_boost_round): - X, y = test_data["df"] +def test_objective_function_class(use_weight, num_boost_round, custom_objective, objective_name, df, num_class): + X, y = df rng = np.random.default_rng() weight = rng.choice([1, 2], y.shape) if use_weight else None - lgb_train = lgb.Dataset(X, y, weight=weight, init_score=np.zeros((len(y), test_data["num_class"]))) + lgb_train = lgb.Dataset(X, y, weight=weight, init_score=np.zeros((len(y), num_class))) params = { "verbose": -1, - "objective": test_data["objective_name"], - "num_class": test_data["num_class"], + "objective": objective_name, + "num_class": num_class, "device": "cpu", } - builtin_loss = builtin_objective(test_data["objective_name"], copy.deepcopy(params)) - builtin_convert_outputs = lgb.ObjectiveFunction(test_data["objective_name"], copy.deepcopy(params)).convert_outputs + builtin_loss = builtin_objective(objective_name, copy.deepcopy(params)) + builtin_convert_outputs = lgb.ObjectiveFunction(objective_name, copy.deepcopy(params)).convert_outputs params["objective"] = builtin_loss booster_exposed = lgb.train(params, lgb_train, num_boost_round=num_boost_round) - params["objective"] = test_data["objective_name"] + params["objective"] = objective_name booster = lgb.train(params, lgb_train, num_boost_round=num_boost_round) - params["objective"] = test_data["custom_objective"] + params["objective"] = custom_objective booster_custom = lgb.train(params, lgb_train, num_boost_round=num_boost_round) np.testing.assert_allclose(booster_exposed.predict(X), booster.predict(X, raw_score=True)) np.testing.assert_allclose(booster_exposed.predict(X), booster_custom.predict(X)) y_pred = np.zeros_like(booster.predict(X, raw_score=True)) - np.testing.assert_allclose(builtin_loss(y_pred, lgb_train), test_data["custom_objective"](y_pred, lgb_train)) + np.testing.assert_allclose(builtin_loss(y_pred, lgb_train), custom_objective(y_pred, lgb_train)) np.testing.assert_allclose(builtin_convert_outputs(booster_exposed.predict(X)), booster.predict(X)) From e52568d4bd1c46b1e7f348fd4d956317b631d3e0 Mon Sep 17 00:00:00 2001 From: Atanas Dimitrov Date: Mon, 2 Sep 2024 15:35:04 +0300 Subject: [PATCH 13/17] Use empty, rename --- include/LightGBM/objective_function.h | 1 + python-package/lightgbm/basic.py | 8 ++++---- src/objective/multiclass_objective.hpp | 2 ++ tests/python_package_test/test_engine.py | 4 ++-- 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/include/LightGBM/objective_function.h b/include/LightGBM/objective_function.h index ae09a550ea5f..c9d6f2958abd 100644 --- a/include/LightGBM/objective_function.h +++ b/include/LightGBM/objective_function.h @@ -68,6 +68,7 @@ class ObjectiveFunction { virtual data_size_t NumPositiveData() const { return 0; } virtual void ConvertOutputs(const int num_data, const double* inputs, double* outputs) const { + #pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static) for (int i = 0; i < num_data; i ++) { ConvertOutput(inputs + i, outputs + i); } diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py index cd76ccd594cf..3b6b323acf9e 100644 --- a/python-package/lightgbm/basic.py +++ b/python-package/lightgbm/basic.py @@ -5331,7 +5331,7 @@ def init(self, dataset: Dataset) -> "ObjectiveFunction": """ return self.__init_from_dataset(dataset) - def convert_outputs(self, scores: np.ndarray) -> np.ndarray: + def convert_raw_scores(self, scores: np.ndarray) -> np.ndarray: """ Convert the raw scores to the final predictions. @@ -5353,7 +5353,7 @@ def convert_outputs(self, scores: np.ndarray) -> np.ndarray: scores = _data_to_2d_numpy(scores, dtype=np.float64, name="scores") num_data = scores.size - out_preds = np.zeros_like(scores, dtype=np.float64) + out_preds = np.empty_like(scores, dtype=np.float64) _safe_call( _LIB.LGBM_ObjectiveFunctionConvertOutputs( @@ -5387,8 +5387,8 @@ def get_gradients(self, y_pred: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: raise ValueError("ObjectiveFunction was not created properly") data_shape = self.num_data * self.num_class - grad = np.zeros(dtype=np.float32, shape=data_shape) - hess = np.zeros(dtype=np.float32, shape=data_shape) + grad = np.empty(dtype=np.float32, shape=data_shape) + hess = np.empty(dtype=np.float32, shape=data_shape) _safe_call( _LIB.LGBM_ObjectiveFunctionGetGradients( diff --git a/src/objective/multiclass_objective.hpp b/src/objective/multiclass_objective.hpp index 3d161cc8864f..cd909858de41 100644 --- a/src/objective/multiclass_objective.hpp +++ b/src/objective/multiclass_objective.hpp @@ -130,6 +130,7 @@ class MulticlassSoftmax: public ObjectiveFunction { } void ConvertOutputs(const int num_data, const double* inputs, double* outputs) const override { + #pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static) for (int i = 0; i < num_data; i += num_class_) { ConvertOutput(inputs + i, outputs + i); } @@ -243,6 +244,7 @@ class MulticlassOVA: public ObjectiveFunction { } void ConvertOutputs(const int num_data, const double* inputs, double* outputs) const override { + #pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static) for (int i = 0; i < num_data; i += num_class_) { ConvertOutput(inputs + i, outputs + i); } diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index b7ca2831ef72..dc1cf13f6f47 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -4423,7 +4423,7 @@ def test_objective_function_class(use_weight, num_boost_round, custom_objective, "device": "cpu", } builtin_loss = builtin_objective(objective_name, copy.deepcopy(params)) - builtin_convert_outputs = lgb.ObjectiveFunction(objective_name, copy.deepcopy(params)).convert_outputs + builtin_convert_scores = lgb.ObjectiveFunction(objective_name, copy.deepcopy(params)).convert_raw_scores params["objective"] = builtin_loss booster_exposed = lgb.train(params, lgb_train, num_boost_round=num_boost_round) @@ -4440,4 +4440,4 @@ def test_objective_function_class(use_weight, num_boost_round, custom_objective, y_pred = np.zeros_like(booster.predict(X, raw_score=True)) np.testing.assert_allclose(builtin_loss(y_pred, lgb_train), custom_objective(y_pred, lgb_train)) - np.testing.assert_allclose(builtin_convert_outputs(booster_exposed.predict(X)), booster.predict(X)) + np.testing.assert_allclose(builtin_convert_scores(booster_exposed.predict(X)), booster.predict(X)) From dd8b6924c9c0fb94f5a7f14ce4ac63a4183c76f4 Mon Sep 17 00:00:00 2001 From: Atanas Dimitrov Date: Mon, 2 Sep 2024 15:41:45 +0300 Subject: [PATCH 14/17] Make test single initialization --- tests/python_package_test/test_engine.py | 15 ++++++++++++++- tests/python_package_test/utils.py | 15 --------------- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index dc1cf13f6f47..cb8ba6143931 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -24,7 +24,6 @@ from .utils import ( SERIALIZERS, - builtin_objective, dummy_obj, load_breast_cancer, load_digits, @@ -4411,6 +4410,20 @@ def test_quantized_training(): ) @pytest.mark.skipif(getenv("TASK", "") == "cuda", reason="Skip due to ObjectiveFunction not exposed for cuda devices.") def test_objective_function_class(use_weight, num_boost_round, custom_objective, objective_name, df, num_class): + def builtin_objective(name, params): + fobj = lgb.ObjectiveFunction(name, params) + + def loss(y_pred, dtrain): + if fobj.num_data is None: + fobj.init(dtrain) + (grad, hess) = fobj.get_gradients(y_pred) + if fobj.num_class != 1: + grad = grad.reshape((fobj.num_class, -1)).transpose() + hess = hess.reshape((fobj.num_class, -1)).transpose() + return (grad, hess) + + return loss + X, y = df rng = np.random.default_rng() weight = rng.choice([1, 2], y.shape) if use_weight else None diff --git a/tests/python_package_test/utils.py b/tests/python_package_test/utils.py index 7cd0617642fd..fa57a21a1876 100644 --- a/tests/python_package_test/utils.py +++ b/tests/python_package_test/utils.py @@ -168,21 +168,6 @@ def multiclass_custom_objective(y_pred, ds): return grad, hess -def builtin_objective(name, params): - """Mimics the builtin objective functions to mock training.""" - - def wrapper(y_pred, dtrain): - fobj = lgb.ObjectiveFunction(name, params) - fobj.init(dtrain) - (grad, hess) = fobj.get_gradients(y_pred) - if fobj.num_class != 1: - grad = grad.reshape((fobj.num_class, -1)).transpose() - hess = hess.reshape((fobj.num_class, -1)).transpose() - return (grad, hess) - - return wrapper - - def pickle_obj(obj, filepath, serializer): if serializer == "pickle": with open(filepath, "wb") as f: From 847a34c0aad44baf1be08a12599bfdec5e0c623e Mon Sep 17 00:00:00 2001 From: Atanas Dimitrov Date: Mon, 2 Sep 2024 17:41:07 +0300 Subject: [PATCH 15/17] Fix tests --- include/LightGBM/objective_function.h | 2 +- python-package/lightgbm/basic.py | 7 +++++++ src/objective/multiclass_objective.hpp | 4 ++-- tests/python_package_test/test_engine.py | 2 +- 4 files changed, 11 insertions(+), 4 deletions(-) diff --git a/include/LightGBM/objective_function.h b/include/LightGBM/objective_function.h index c9d6f2958abd..4b20c8c91794 100644 --- a/include/LightGBM/objective_function.h +++ b/include/LightGBM/objective_function.h @@ -70,7 +70,7 @@ class ObjectiveFunction { virtual void ConvertOutputs(const int num_data, const double* inputs, double* outputs) const { #pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static) for (int i = 0; i < num_data; i ++) { - ConvertOutput(inputs + i, outputs + i); + ConvertOutput(&inputs[i], &outputs[i]); } } diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py index 3b6b323acf9e..1c8930ab2784 100644 --- a/python-package/lightgbm/basic.py +++ b/python-package/lightgbm/basic.py @@ -5386,7 +5386,14 @@ def get_gradients(self, y_pred: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: if self.num_data is None or self.num_class is None: raise ValueError("ObjectiveFunction was not created properly") + if y_pred.shape[0] != self.num_data: + raise ValueError("Gradients cannot be computed as the number of predictions is wrong") + + if self.num_class != 1 and (y_pred.ndim != 2 or y_pred.shape[1] != self.num_class): + raise ValueError("Multiclass gradient computation should be called with the correct shape") + data_shape = self.num_data * self.num_class + y_pred = np.asfortranarray(y_pred) grad = np.empty(dtype=np.float32, shape=data_shape) hess = np.empty(dtype=np.float32, shape=data_shape) diff --git a/src/objective/multiclass_objective.hpp b/src/objective/multiclass_objective.hpp index cd909858de41..e36a5fb8f708 100644 --- a/src/objective/multiclass_objective.hpp +++ b/src/objective/multiclass_objective.hpp @@ -132,7 +132,7 @@ class MulticlassSoftmax: public ObjectiveFunction { void ConvertOutputs(const int num_data, const double* inputs, double* outputs) const override { #pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static) for (int i = 0; i < num_data; i += num_class_) { - ConvertOutput(inputs + i, outputs + i); + ConvertOutput(&inputs[i], &outputs[i]); } } @@ -246,7 +246,7 @@ class MulticlassOVA: public ObjectiveFunction { void ConvertOutputs(const int num_data, const double* inputs, double* outputs) const override { #pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static) for (int i = 0; i < num_data; i += num_class_) { - ConvertOutput(inputs + i, outputs + i); + ConvertOutput(&inputs[i], &outputs[i]); } } diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index cb8ba6143931..8660e34eb73d 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -4450,7 +4450,7 @@ def loss(y_pred, dtrain): np.testing.assert_allclose(booster_exposed.predict(X), booster.predict(X, raw_score=True)) np.testing.assert_allclose(booster_exposed.predict(X), booster_custom.predict(X)) - y_pred = np.zeros_like(booster.predict(X, raw_score=True)) + y_pred = booster.predict(X, raw_score=True) np.testing.assert_allclose(builtin_loss(y_pred, lgb_train), custom_objective(y_pred, lgb_train)) np.testing.assert_allclose(builtin_convert_scores(booster_exposed.predict(X)), booster.predict(X)) From d2488c5e6f29a1bfcc7abd48c895681825fa5462 Mon Sep 17 00:00:00 2001 From: Atanas Dimitrov Date: Tue, 3 Sep 2024 12:28:40 +0300 Subject: [PATCH 16/17] Update docstring --- python-package/lightgbm/basic.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py index 1c8930ab2784..785609957802 100644 --- a/python-package/lightgbm/basic.py +++ b/python-package/lightgbm/basic.py @@ -5288,8 +5288,10 @@ class ObjectiveFunction: ObjectiveFunction in LightGBM. This class exposes the builtin objective functions for evaluating gradients and hessians - on external datasets. LightGBM does not use this wrapper during its training as it is - using the underlying C++ class. + on external datasets. This is useful for examining the state of the training(for example in a callback) + in a generic way. + + Note: LightGBM does not use this wrapper during its training as it is using the underlying C++ class. """ def __init__(self, name: str, params: Dict[str, Any]): From 038291dc07ecdf4f2deaaa57b82ebc10863b498d Mon Sep 17 00:00:00 2001 From: Atanas Dimitrov Date: Thu, 12 Sep 2024 20:50:20 +0300 Subject: [PATCH 17/17] Add elaborate example for ObjectiveFunction --- .../examine_feat_importances_for_test.ipynb | 257 ++++++++++++++++++ 1 file changed, 257 insertions(+) create mode 100644 examples/python-guide/notebooks/examine_feat_importances_for_test.ipynb diff --git a/examples/python-guide/notebooks/examine_feat_importances_for_test.ipynb b/examples/python-guide/notebooks/examine_feat_importances_for_test.ipynb new file mode 100644 index 000000000000..cb69af96082d --- /dev/null +++ b/examples/python-guide/notebooks/examine_feat_importances_for_test.ipynb @@ -0,0 +1,257 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "e8f2e1a0-2faf-4955-a3e4-6c36b00e1ce8", + "metadata": {}, + "source": [ + "# Examine the feature importances on an external dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "6ad3ef6b-912a-4311-aa77-d6ac30b772d1", + "metadata": {}, + "outputs": [], + "source": [ + "import lightgbm as lgb\n", + "from sklearn import datasets\n", + "from sklearn.model_selection import train_test_split\n", + "import numpy as np" + ] + }, + { + "cell_type": "markdown", + "id": "c4d9236e-cbda-44fb-aed0-414bda510cd2", + "metadata": {}, + "source": [ + "## Helper wrapper around " + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "cebd065f-173a-473c-812c-e855cdcafb62", + "metadata": {}, + "outputs": [], + "source": [ + "def make_regression(noise_columns=0, **kwargs):\n", + " X, y = datasets.make_regression(**kwargs)\n", + " np.random.default_rng(kwargs.get(\"random_state\", 0))\n", + " if noise_columns != 0:\n", + " X = np.append(X, np.random.choice(range(10), (X.shape[0], noise_columns)), axis=1) \n", + " X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", + " return X_train, X_test, y_train, y_test" + ] + }, + { + "cell_type": "markdown", + "id": "434d995f-52b6-45c4-b94f-bcb62f137a33", + "metadata": {}, + "source": [ + "## Train the model" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "6fe90ef4-73f2-417a-afca-48981e0eb576", + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'numpy' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[10], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m X_train, X_test, y_train, y_test \u001b[38;5;241m=\u001b[39m \u001b[43mmake_regression\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 2\u001b[0m \u001b[43m \u001b[49m\u001b[43mn_samples\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m3000\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[43m \u001b[49m\u001b[43mnoise\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m \u001b[49m\u001b[43mn_features\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m10\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[43m \u001b[49m\u001b[43mnoise_columns\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m5\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\n\u001b[1;32m 6\u001b[0m \u001b[43m \u001b[49m\u001b[43mrandom_state\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\n\u001b[1;32m 7\u001b[0m \u001b[43m)\u001b[49m\n\u001b[1;32m 9\u001b[0m model \u001b[38;5;241m=\u001b[39m lgb\u001b[38;5;241m.\u001b[39mLGBMRegressor(objective\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mregression\u001b[39m\u001b[38;5;124m\"\u001b[39m, verbose\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m)\u001b[38;5;241m.\u001b[39mfit(X_train, y_train)\n\u001b[1;32m 10\u001b[0m lgb\u001b[38;5;241m.\u001b[39mplot_importance(model, importance_type\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mgain\u001b[39m\u001b[38;5;124m'\u001b[39m, max_num_features\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m30\u001b[39m)\n", + "Cell \u001b[0;32mIn[9], line 3\u001b[0m, in \u001b[0;36mmake_regression\u001b[0;34m(noise_columns, **kwargs)\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mmake_regression\u001b[39m(noise_columns\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0\u001b[39m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 2\u001b[0m X, y \u001b[38;5;241m=\u001b[39m datasets\u001b[38;5;241m.\u001b[39mmake_regression(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m----> 3\u001b[0m \u001b[43mnumpy\u001b[49m\u001b[38;5;241m.\u001b[39mrandom\u001b[38;5;241m.\u001b[39mdefault_rng(kwargs\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrandom_state\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;241m0\u001b[39m))\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m noise_columns \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m 5\u001b[0m X \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mappend(X, np\u001b[38;5;241m.\u001b[39mrandom\u001b[38;5;241m.\u001b[39mchoice(\u001b[38;5;28mrange\u001b[39m(\u001b[38;5;241m10\u001b[39m), (X\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m0\u001b[39m], noise_columns)), axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m) \n", + "\u001b[0;31mNameError\u001b[0m: name 'numpy' is not defined" + ] + } + ], + "source": [ + "X_train, X_test, y_train, y_test = make_regression(\n", + " n_samples=3000, \n", + " noise=1, \n", + " n_features=10, \n", + " noise_columns=5, \n", + " random_state=1\n", + ")\n", + "\n", + "model = lgb.LGBMRegressor(objective=\"regression\", verbose=-1).fit(X_train, y_train)\n", + "lgb.plot_importance(model, importance_type='gain', max_num_features=30)" + ] + }, + { + "cell_type": "markdown", + "id": "b7e405b4-8f5b-4ba6-8ba1-6310b0518b26", + "metadata": {}, + "source": [ + "## Trace the tree and compute the gain of each split on the test set" + ] + }, + { + "cell_type": "markdown", + "id": "109a4ea6-e487-4d14-9ccf-338994e0cebb", + "metadata": {}, + "source": [ + "### Helper function to calculate the gain of each node" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "99288058-085d-4612-8c75-91cfaae95ec3", + "metadata": {}, + "outputs": [], + "source": [ + "def node_gain(sum_grad, sum_hess, value):\n", + " return -(2.0 * sum_grad * value + sum_hess * value * value)" + ] + }, + { + "cell_type": "markdown", + "id": "20bd13d7-13b9-4bbb-bd2b-30f037e390a7", + "metadata": {}, + "source": [ + "### Helper function to trace a singular tree and sum the feature_importance of each feature" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "98c90aa4-851e-48c0-b0a7-139add05e883", + "metadata": {}, + "outputs": [], + "source": [ + "def dfs_accumulate(gains, tree, grad, hess, predicted_leaves):\n", + " if 'left_child' not in tree:\n", + " leaf_grad = grad[predicted_leaves == tree['leaf_index']].sum()\n", + " leaf_hess = hess[predicted_leaves == tree['leaf_index']].sum()\n", + " leaf_value = tree['leaf_value']\n", + " leaf_gain = node_gain(leaf_grad, leaf_hess, leaf_value)\n", + " return leaf_grad, leaf_hess, leaf_value, leaf_gain\n", + " else:\n", + " l_grad, l_hess, l_val, l_gain = dfs_accumulate(gains, tree['left_child'], grad, hess, predicted_leaves)\n", + " r_grad, r_hess, r_val, r_gain = dfs_accumulate(gains, tree['right_child'], grad, hess, predicted_leaves)\n", + " sum_grad, sum_hess, value = (l_grad + r_grad, l_hess + r_hess, tree['internal_value'])\n", + " now_gain = node_gain(sum_grad, sum_hess, value) \n", + " gains[tree['split_feature']] += (l_gain + r_gain) - now_gain\n", + " return sum_grad, sum_hess, value, now_gain" + ] + }, + { + "cell_type": "markdown", + "id": "2e649d33-8a7d-4129-b274-c5a203284e4a", + "metadata": {}, + "source": [ + "### Run the whole computation, utilizing the exposed ObjectiveFunction class" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "907774be-103f-4cd5-9ff3-000b7581ac72", + "metadata": {}, + "outputs": [], + "source": [ + "def compute_out_of_sample_gains(X, y, model, objective):\n", + " booster = model.booster_\n", + " eval_set = lgb.Dataset(X, y).construct()\n", + " fobj = lgb.ObjectiveFunction(objective, {\"objective\": objective}).init(eval_set)\n", + " score = np.zeros(len(y)) \n", + " model_dump = booster.dump_model()\n", + " \n", + " gains = np.zeros(len(model_dump['feature_names']), dtype=np.float64)\n", + " \n", + " for i, tree in enumerate(model_dump['tree_info']):\n", + " grad, hess = fobj.get_gradients(score)\n", + " score += model.predict(X, num_iteration=1, start_iteration=i)\n", + " predicted_leaves = model.predict(X, num_iteration=1, start_iteration=i, pred_leaf=True)\n", + " dfs_accumulate(gains, tree['tree_structure'], grad, hess, predicted_leaves)\n", + "\n", + " return {\n", + " feat: value\n", + " for value, feat in sorted(zip(gains, model_dump['feature_names']), key=lambda x: x[0])\n", + " }\n", + "\n", + "oos_gains = compute_out_of_sample_gains(X_test, y_test, model, \"regression\")" + ] + }, + { + "cell_type": "markdown", + "id": "937a49d1-3cb2-4d03-982a-4938a4f10fde", + "metadata": {}, + "source": [ + "### Examine the plot and deduce that the noise columns are actually harmful, rather than less helpful" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "231d48e6-e9fb-40d3-a030-5e4ec8413b3c", + "metadata": {}, + "outputs": [], + "source": [ + "oos_gains_harmful = {\n", + " column: value\n", + " for column, value in oos_gains.items()\n", + " if value < 0\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "ad778404-4112-430d-bdd7-1e35339ba68a", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlQAAAHUCAYAAAD8wLZsAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABL2klEQVR4nO3dfXzO9f////sxm2O2mZlz3mMbas4WbyfldCYM5c270hmNpG8aykmF9/tNo5y8URQ2704+80kqFW8qIoR3vVWIRYxysibmJMxiNTt5/v7w2/HpsE1zvA6Obd2ul8txyfE8nq/X6/E4Xk7uvV7P45jNGGMEAAAAl3l5ugAAAICyjkAFAABgEYEKAADAIgIVAACARQQqAAAAiwhUAAAAFhGoAAAALCJQAQAAWESgAgAAsIhABeCaLF68WDabrcjHU089dV2OuW/fPsXHxys1NfW67N+K1NRU2Ww2LV682NOluGzNmjWKj4/3dBlAmebt6QIAlE1JSUmKiIhwGqtbt+51Oda+ffs0ZcoUde3aVaGhodflGK6qU6eOvvjiCzVs2NDTpbhszZo1WrhwIaEKsIBABcAlzZs3V5s2bTxdhiU5OTmy2Wzy9nb9r0K73a7bbrvNjVXdOFlZWfLz8/N0GUC5wC0/ANfFsmXL1L59e/n7+ysgIEAxMTHatWuX05wdO3bo/vvvV2hoqCpVqqTQ0FA98MAD+uGHHxxzFi9erAEDBkiSoqOjHbcXC26xhYaGasiQIYWO37VrV3Xt2tXxfPPmzbLZbFqyZInGjRunevXqyW636+DBg5KkDRs26Pbbb1dgYKD8/PzUsWNHbdy48Xf7LOqWX3x8vGw2m3bv3q0BAwaoSpUqCg4O1tixY5Wbm6sDBw6oV69eqly5skJDQzVr1iynfRbU+uabb2rs2LGqXbu2KlWqpKioqELvoSR98MEHat++vfz8/FS5cmX16NFDX3zxhdOcgpp27type+65R1WrVlXDhg01ZMgQLVy4UJKcbt8W3F5duHChunTpopo1a8rf318tWrTQrFmzlJOTU+j9bt68ubZv367OnTvLz89P4eHhmjlzpvLz853mZmRkaNy4cQoPD5fdblfNmjXVp08f7d+/3zHn0qVLev755xURESG73a4aNWro4Ycf1unTp3/3nACeQKAC4JK8vDzl5uY6PQpMnz5dDzzwgJo2bap3331XS5Ys0c8//6zOnTtr3759jnmpqam6+eabNW/ePK1bt07//Oc/lZ6errZt2+qnn36SJN1xxx2aPn26pMv/uH/xxRf64osvdMcdd7hU98SJE5WWlqZFixbpww8/VM2aNfXmm2+qZ8+eCgwM1P/+7//q3XffVXBwsGJiYkoUqopz77336pZbbtHy5cv16KOPau7cuRozZoz69++vO+64Q//+97/VrVs3jR8/XitWrCi0/d/+9jcdPnxYr732ml577TUdP35cXbt21eHDhx1z3nrrLfXr10+BgYF6++239frrr+vcuXPq2rWrPv/880L7vOuuu9SoUSO99957WrRokSZNmqR77rlHkhzv7RdffKE6depIkg4dOqQHH3xQS5Ys0UcffaRHHnlEs2fP1mOPPVZo3ydOnNDAgQM1aNAgffDBB+rdu7cmTpyoN9980zHn559/VqdOnfSvf/1LDz/8sD788EMtWrRIN910k9LT0yVJ+fn56tevn2bOnKkHH3xQq1ev1syZM7V+/Xp17dpVv/zyi8vnBLhuDABcg6SkJCOpyEdOTo5JS0sz3t7eZtSoUU7b/fzzz6Z27drm3nvvLXbfubm55sKFC8bf39+89NJLjvH33nvPSDKbNm0qtE2DBg3M4MGDC41HRUWZqKgox/NNmzYZSaZLly5O8y5evGiCg4NN3759ncbz8vLMLbfcYtq1a3eVd8OYI0eOGEkmKSnJMfbss88aSeaFF15wmtuyZUsjyaxYscIxlpOTY2rUqGHuuuuuQrX++c9/Nvn5+Y7x1NRU4+PjY4YNG+aosW7duqZFixYmLy/PMe/nn382NWvWNB06dChU0+TJkwv1MGLECFOSfw7y8vJMTk6OeeONN0yFChXM2bNnHa9FRUUZSearr75y2qZp06YmJibG8Xzq1KlGklm/fn2xx3n77beNJLN8+XKn8e3btxtJJiEh4XdrBW40rlABcMkbb7yh7du3Oz28vb21bt065ebmKjY21unqla+vr6KiorR582bHPi5cuKDx48erUaNG8vb2lre3twICAnTx4kWlpKRcl7rvvvtup+dbt27V2bNnNXjwYKd68/Pz1atXL23fvl0XL1506Vh33nmn0/MmTZrIZrOpd+/ejjFvb281atTI6TZngQcffFA2m83xvEGDBurQoYM2bdokSTpw4ICOHz+uhx56SF5e//fXeUBAgO6++259+eWXysrKumr/v2fXrl36y1/+omrVqqlChQry8fFRbGys8vLy9N133znNrV27ttq1a+c0FhkZ6dTbxx9/rJtuukndu3cv9pgfffSRgoKC1LdvX6dz0rJlS9WuXdvp9xBQWrAoHYBLmjRpUuSi9JMnT0qS2rZtW+R2v/2H/8EHH9TGjRs1adIktW3bVoGBgbLZbOrTp891u61TcCvrynoLbnsV5ezZs/L397/mYwUHBzs9r1ixovz8/OTr61toPDMzs9D2tWvXLnLsm2++kSSdOXNGUuGepMufuMzPz9e5c+ecFp4XNbc4aWlp6ty5s26++Wa99NJLCg0Nla+vr7Zt26YRI0YUOkfVqlUrtA+73e407/Tp06pfv/5Vj3vy5EllZGSoYsWKRb5ecDsYKE0IVADcqnr16pKk999/Xw0aNCh23vnz5/XRRx/p2Wef1YQJExzj2dnZOnv2bImP5+vrq+zs7ELjP/30k6OW3/rtFZ/f1jt//vxiP61Xq1atEtfjTidOnChyrCC4FPy3YO3Rbx0/flxeXl6qWrWq0/iV/V/NypUrdfHiRa1YscLpXCYnJ5d4H1eqUaOGfvzxx6vOqV69uqpVq6a1a9cW+XrlypVdPj5wvRCoALhVTEyMvL29dejQoaveXrLZbDLGyG63O42/9tprysvLcxormFPUVavQ0FDt3r3baey7777TgQMHigxUV+rYsaOCgoK0b98+jRw58nfn30hvv/22xo4d6whBP/zwg7Zu3arY2FhJ0s0336x69erprbfe0lNPPeWYd/HiRS1fvtzxyb/f89v3t1KlSo7xgv399hwZY/Tqq6+63FPv3r01efJkffrpp+rWrVuRc+6880698847ysvL06233urysYAbiUAFwK1CQ0M1depU/f3vf9fhw4fVq1cvVa1aVSdPntS2bdvk7++vKVOmKDAwUF26dNHs2bNVvXp1hYaGasuWLXr99dcVFBTktM/mzZtLkl555RVVrlxZvr6+CgsLU7Vq1fTQQw9p0KBBiouL0913360ffvhBs2bNUo0aNUpUb0BAgObPn6/Bgwfr7Nmzuueee1SzZk2dPn1a33zzjU6fPq3ExER3v00lcurUKf31r3/Vo48+qvPnz+vZZ5+Vr6+vJk6cKOny7dNZs2Zp4MCBuvPOO/XYY48pOztbs2fPVkZGhmbOnFmi47Ro0UKS9M9//lO9e/dWhQoVFBkZqR49eqhixYp64IEH9Mwzz+jXX39VYmKizp0753JPo0eP1rJly9SvXz9NmDBB7dq10y+//KItW7bozjvvVHR0tO6//34tXbpUffr00ZNPPql27drJx8dHP/74ozZt2qR+/frpr3/9q8s1ANeFp1fFAyhbCj7lt3379qvOW7lypYmOjjaBgYHGbrebBg0amHvuucds2LDBMefHH380d999t6lataqpXLmy6dWrl/n222+L/OTevHnzTFhYmKlQoYLTp+ry8/PNrFmzTHh4uPH19TVt2rQxn376abGf8nvvvfeKrHfLli3mjjvuMMHBwcbHx8fUq1fP3HHHHcXOL3C1T/mdPn3aae7gwYONv79/oX1ERUWZZs2aFap1yZIl5oknnjA1atQwdrvddO7c2ezYsaPQ9itXrjS33nqr8fX1Nf7+/ub22283//3vf53mFFeTMcZkZ2ebYcOGmRo1ahibzWYkmSNHjhhjjPnwww/NLbfcYnx9fU29evXM008/bT7++ONCn7q8soff9tygQQOnsXPnzpknn3zS1K9f3/j4+JiaNWuaO+64w+zfv98xJycnx8yZM8dx7ICAABMREWEee+wx8/333xc6DuBpNmOM8ViaAwAUsnnzZkVHR+u999676mJ5AKUHX5sAAABgEYEKAADAIm75AQAAWMQVKgAAAIsIVAAAABYRqAAAACziiz3Lmfz8fB0/flyVK1e+ph8xAQDAH50xRj///LPq1q3r9HNHS4JAVc4cP35cISEhni4DAIAy6+jRo/rTn/50TdsQqMqZgh8aevToUQUGBnq4Gtfl5OTok08+Uc+ePeXj4+Ppciyjn9KtPPVTnnqR6Ke0K2/9nD17VmFhYS79AG4CVTlTcJsvMDCwzAcqPz8/BQYGlos/pPRTupWnfspTLxL9lHblsR9JLi2ZYVE6AACARQQqAAAAiwhUAAAAFhGoAAAALCJQAQAAWESgAgAAsIhABQAAYBGBCgAAwCICFQAAgEUEKgAAAIsIVAAAABYRqAAAACwiUAEAAFhEoAIAALDI29MFAAAAhE5Y7ekS5J170eVtuUIFAABgEYEKAADAIgIVAACARQQqAAAAiwhUAAAAFhGoAAAALCJQAQAAWESgAgAAsIhABQAAYBGBCgAAwCICFQAAgEUEKgAAAIsIVAAAABYRqAAAACwiUAEAAFhEoAIAALCIQAUAAGARgQoAAMAiAhUAAIBFBCoAAACLCFQAAAAWEagAAAAsIlABAABYRKACAACwqMwHqvj4eLVs2dLTZQAAgD8wjweqEydOaNSoUQoPD5fdbldISIj69u2rjRs3ero0t9q7d6/uvvtuhYaGymazad68eYXmzJgxQ23btlXlypVVs2ZN9e/fXwcOHLjxxQIAgGvi0UCVmpqq1q1b69NPP9WsWbO0Z88erV27VtHR0RoxYoQnS3O7rKwshYeHa+bMmapdu3aRc7Zs2aIRI0boyy+/1Pr165Wbm6uePXvq4sWLN7haAABwLbw9efC4uDjZbDZt27ZN/v7+jvFmzZpp6NChkqS0tDSNGjVKGzdulJeXl3r16qX58+erVq1aRe6za9euatmypdMVoP79+ysoKEiLFy+WJIWGhmrYsGH67rvvtGLFClWrVk0vv/yyOnTooGHDhmnjxo0KCwtTUlKS2rRpI0lavHixRo8erWXLlmn06NE6evSoOnXqpKSkJNWpU+d3e23btq3atm0rSZowYUKRc9auXev0PCkpSTVr1tTXX3+tLl26FLlNdna2srOzHc8zMzMlSTk5OcrJyfndukqrgtrLcg+/RT+lW3nqpzz1ItFPaefOfuwVjOV9WOWd73oNHgtUZ8+e1dq1azVt2jSnMFUgKChIxhj1799f/v7+2rJli3JzcxUXF6f77rtPmzdvtnT8uXPnavr06Zo0aZLmzp2rhx56SB07dtTQoUM1e/ZsjR8/XrGxsdq7d69sNpuky1eZ5syZoyVLlsjLy0uDBg3SU089paVLl1qqpTjnz5+XJAUHBxc7Z8aMGZoyZUqh8U8++UR+fn7Xpa4baf369Z4uwa3op3QrT/2Up14k+int3NHPrHZuKMSirKx8Pejith4LVAcPHpQxRhEREcXO2bBhg3bv3q0jR44oJCREkrRkyRI1a9ZM27dvd1zxcUWfPn302GOPSZImT56sxMREtW3bVgMGDJAkjR8/Xu3bt9fJkycdt+hycnK0aNEiNWzYUJI0cuRITZ061eUarsYYo7Fjx6pTp05q3rx5sfMmTpyosWPHOp5nZmYqJCREPXv2VGBg4HWp7UbIycnR+vXr1aNHD/n4+Hi6HMvop3QrT/2Up14k+int3NlP8/h1bqrKdd45rq+E8ligMubyZbWCqz9FSUlJUUhIiCNMSVLTpk0VFBSklJQUS4EqMjLS8euC24ctWrQoNHbq1ClHoPLz83OEKUmqU6eOTp065XINVzNy5Ejt3r1bn3/++VXn2e122e32QuM+Pj7l4g9reemjAP2UbuWpn/LUi0Q/pZ07+snOKz4P3Ch5+a7X4LFF6Y0bN5bNZlNKSkqxc4wxRQau4sYlycvLyxHWChR1b/e3J75gX0WN5efnF7lNwZwrj+UOo0aN0gcffKBNmzbpT3/6k9v3DwAA3MtjgSo4OFgxMTFauHBhkZ9iy8jIUNOmTZWWlqajR486xvft26fz58+rSZMmRe63Ro0aSk9PdzzPy8vTt99+6/4GrgNjjEaOHKkVK1bo008/VVhYmKdLAgAAJeDRr01ISEhQXl6e2rVrp+XLl+v7779XSkqKXn75ZbVv317du3dXZGSkBg4cqJ07d2rbtm2KjY1VVFSU49N3V+rWrZtWr16t1atXa//+/YqLi1NGRsaNbawIly5dUnJyspKTk3Xp0iUdO3ZMycnJOnjwoGPOiBEj9Oabb+qtt95S5cqVdeLECZ04cUK//PKLBysHAAC/x6OBKiwsTDt37lR0dLTGjRun5s2bq0ePHtq4caMSExNls9m0cuVKVa1aVV26dFH37t0VHh6uZcuWFbvPoUOHavDgwY7gFRYWpujo6BvYVdGOHz+uVq1aqVWrVkpPT9ecOXPUqlUrDRs2zDEnMTFR58+fV9euXVWnTh3H42r9AgAAz/Po91BJlxd2L1iwQAsWLCjy9fr162vVqlXFbh8fH6/4+HjHcx8fHyUkJCghIaHYbVJTUwuNXbkWKjQ01GlsyJAhGjJkiNOc/v37l3gN1ZX7K8r1WI8FAACuP4//6BkAAICyjkDlJgEBAcU+PvvsM0+XBwAAriOP3/IrL5KTk4t9rV69ejeuEAAAcMMRqNykUaNGni4BAAB4CLf8AAAALCJQAQAAWESgAgAAsIhABQAAYBGBCgAAwCICFQAAgEUEKgAAAIsIVAAAABYRqAAAACwiUAEAAFhEoAIAALCIQAUAAGARgQoAAMAiAhUAAIBFBCoAAACLCFQAAAAWEagAAAAsIlABAABY5O3pAgAAAFJn3uHpEnTmzBlVf8G1bblCBQAAYBGBCgAAwCICFQAAgEUEKgAAAIsIVAAAABYRqAAAACwiUAEAAFhEoAIAALCIQAUAAGARgQoAAMAiAhUAAIBFBCoAAACLCFQAAAAWeXu6AAAA8McQOmG1p0u4Ku/ciy5vyxUqAAAAiwhUAAAAFhGoAAAALCJQAQAAWESgAgAAsIhABQAAYBGBCgAAwCICFQAAgEUEKgAAAIsIVAAAABYRqAAAACwiUAEAAFhEoAIAALCIQAUAAGARgQoAAMAiAhUAAIBFBCoAAACLCFQAAAAWEagAAAAsIlABAABYRKACAACwiEAFAABgEYEKAADAIgIVAACARWU+UMXHx6tly5aeLgMAAPyBeTxQnThxQqNGjVJ4eLjsdrtCQkLUt29fbdy40dOludXevXt19913KzQ0VDabTfPmzStyXkJCgsLCwuTr66vWrVvrs88+u7GFAgCAa+bRQJWamqrWrVvr008/1axZs7Rnzx6tXbtW0dHRGjFihCdLc7usrCyFh4dr5syZql27dpFzli1bptGjR+vvf/+7du3apc6dO6t3795KS0u7wdUCAIBr4e3Jg8fFxclms2nbtm3y9/d3jDdr1kxDhw6VJKWlpWnUqFHauHGjvLy81KtXL82fP1+1atUqcp9du3ZVy5Ytna4A9e/fX0FBQVq8eLEkKTQ0VMOGDdN3332nFStWqFq1anr55ZfVoUMHDRs2TBs3blRYWJiSkpLUpk0bSdLixYs1evRoR+g5evSoOnXqpKSkJNWpU+d3e23btq3atm0rSZowYUKRc1588UU98sgjGjZsmCRp3rx5WrdunRITEzVjxowit8nOzlZ2drbjeWZmpiQpJydHOTk5v1tXaVVQe1nu4bfop3QrT/2Up14k+intrrUfewVzPcuxzDvf9fo8FqjOnj2rtWvXatq0aU5hqkBQUJCMMerfv7/8/f21ZcsW5ebmKi4uTvfdd582b95s6fhz587V9OnTNWnSJM2dO1cPPfSQOnbsqKFDh2r27NkaP368YmNjtXfvXtlsNkmXrzLNmTNHS5YskZeXlwYNGqSnnnpKS5cutVSLJF26dElff/11obDVs2dPbd26tdjtZsyYoSlTphQa/+STT+Tn52e5Lk9bv369p0twK/op3cpTP+WpF4l+SruS9jOr3XUuxKKsrHw96OK2HgtUBw8elDFGERERxc7ZsGGDdu/erSNHjigkJESStGTJEjVr1kzbt293XPFxRZ8+ffTYY49JkiZPnqzExES1bdtWAwYMkCSNHz9e7du318mTJx236HJycrRo0SI1bNhQkjRy5EhNnTrV5Rp+66efflJeXl6hK2+1atXSiRMnit1u4sSJGjt2rON5ZmamQkJC1LNnTwUGBrqlNk/IycnR+vXr1aNHD/n4+Hi6HMvop3QrT/2Up14k+intrrWf5vHrbkBVrvPOcX0llMcClTGXL6sVXP0pSkpKikJCQhxhSpKaNm2qoKAgpaSkWApUkZGRjl8XhJgWLVoUGjt16pQjUPn5+TnClCTVqVNHp06dcrmGolz5fhhjrvoe2e122e32QuM+Pj7l4g9reemjAP2UbuWpn/LUi0Q/pV1J+8nOK/7fs9IgL9/1+jy2KL1x48ay2WxKSUkpdk5xYeJqIcPLy8sR1goUdW/3tye+YF9FjeXn5xe5TcGcK4/lqurVq6tChQqFrkadOnWq2PViAACgdPBYoAoODlZMTIwWLlyoixcvFno9IyNDTZs2VVpamo4ePeoY37dvn86fP68mTZoUud8aNWooPT3d8TwvL0/ffvut+xtws4oVK6p169aF7kOvX79eHTp08FBVAACgJDz6tQkJCQnKy8tTu3bttHz5cn3//fdKSUnRyy+/rPbt26t79+6KjIzUwIEDtXPnTm3btk2xsbGKiopyfPruSt26ddPq1au1evVq7d+/X3FxccrIyLixjRXh0qVLSk5OVnJysi5duqRjx44pOTlZBw8edMwZO3asXnvtNf3P//yPUlJSNGbMGKWlpWn48OEerBwAAPwej35tQlhYmHbu3Klp06Zp3LhxSk9PV40aNdS6dWslJibKZrNp5cqVGjVqlLp06eL0tQnFGTp0qL755hvFxsbK29tbY8aMUXR09A3sqmjHjx9Xq1atHM/nzJmjOXPmKCoqyvGJxfvuu09nzpzR1KlTlZ6erubNm2vNmjVq0KCBh6oGAAAl4dFAJV1e2L1gwQItWLCgyNfr16+vVatWFbt9fHy84uPjHc99fHyUkJCghISEYrdJTU0tNHblWqjQ0FCnsSFDhmjIkCFOc/r371/iNVRX7q84cXFxiouLK9E+AQBA6eDxHz0DAABQ1hGo3CQgIKDYBz+PDwCA8s3jt/zKi+Tk5GJfq1ev3o0rBAAA3HAEKjdp1KiRp0sAAAAewi0/AAAAiwhUAAAAFhGoAAAALCJQAQAAWESgAgAAsIhABQAAYBGBCgAAwCICFQAAgEUEKgAAAIsIVAAAABYRqAAAACwiUAEAAFhEoAIAALCIQAUAAGARgQoAAMAiAhUAAIBFBCoAAACLvD1dAAAA+GNInXmHp0u4qjNnzqj6C65tyxUqAAAAiwhUAAAAFhGoAAAALCJQAQAAWESgAgAAsIhABQAAYBGBCgAAwCICFQAAgEUEKgAAAIsIVAAAABYRqAAAACwiUAEAAFhEoAIAALCIQAUAAGCRt6cLAAAA7hE6YfUNPZ69gtGsdlLz+HXKzrPd0GNfD965F13elitUAAAAFhGoAAAALHJboMrIyHDXrgAAAMoUlwLVP//5Ty1btszx/N5771W1atVUr149ffPNN24rDgAAoCxwKVD961//UkhIiCRp/fr1Wr9+vT7++GP17t1bTz/9tFsLBAAAKO1c+pRfenq6I1B99NFHuvfee9WzZ0+Fhobq1ltvdWuBAAAApZ1LV6iqVq2qo0ePSpLWrl2r7t27S5KMMcrLy3NfdQAAAGWAS1eo7rrrLj344INq3Lixzpw5o969e0uSkpOT1ahRI7cWCAAAUNq5FKjmzp2r0NBQHT16VLNmzVJAQICky7cC4+Li3FogAABAaedSoPLx8dFTTz1VaHz06NFW6wEAAChzXP4eqiVLlqhTp06qW7eufvjhB0nSvHnztGrVKrcVBwAAUBa4FKgSExM1duxY9e7dWxkZGY6F6EFBQZo3b5476wMAACj1XApU8+fP16uvvqq///3vqlChgmO8TZs22rNnj9uKAwAAKAtcClRHjhxRq1atCo3b7XZdvOj6T2oGAAAoi1wKVGFhYUpOTi40/vHHH6tp06ZWawIAAChTXPqU39NPP60RI0bo119/lTFG27Zt09tvv60ZM2botddec3eNAAAApZpLgerhhx9Wbm6unnnmGWVlZenBBx9UvXr19NJLL+n+++93d40AAACl2jUHqtzcXC1dulR9+/bVo48+qp9++kn5+fmqWbPm9agPAACg1LvmNVTe3t56/PHHlZ2dLUmqXr06YQoAAPyhubQo/dZbb9WuXbvcXQsAAECZ5NIaqri4OI0bN04//vijWrduLX9/f6fXIyMj3VIcAABAWeBSoLrvvvskSU888YRjzGazyRgjm83m+OZ0AACAPwKXAtWRI0fcXQcAAECZ5VKgatCggbvrcFl8fLxWrlxZ5BeNAgAA3AguBao33njjqq/HxsaWeF8nTpzQtGnTtHr1ah07dkw1a9ZUy5YtNXr0aN1+++2ulFcq7d27V5MnT9bXX3+tH374QXPnztXo0aOd5vznP//R7Nmz9fXXXys9PV3//ve/1b9/f4/UCwAASs6lQPXkk086Pc/JyVFWVpYqVqwoPz+/Egeq1NRUdezYUUFBQZo1a5YiIyOVk5OjdevWacSIEdq/f78r5ZVKWVlZCg8P14ABAzRmzJgi51y8eFG33HKLHn74Yd199903uEIAAOAql7424dy5c06PCxcu6MCBA+rUqZPefvvtEu8nLi5ONptN27Zt0z333KObbrpJzZo109ixY/Xll19KktLS0tSvXz8FBAQoMDBQ9957r06ePFnsPrt27Vroyk///v01ZMgQx/PQ0FA9//zzio2NVUBAgBo0aKBVq1bp9OnTjmO1aNFCO3bscGyzePFiBQUFad26dWrSpIkCAgLUq1cvpaenl6jXtm3bavbs2br//vtlt9uLnNO7d289//zzuuuuu0q0TwAAUDq4dIWqKI0bN9bMmTM1aNCgEl1ZOnv2rNauXatp06YV+toFSQoKCpIxRv3795e/v7+2bNmi3NxcxcXF6b777tPmzZst1Tt37lxNnz5dkyZN0ty5c/XQQw+pY8eOGjp0qGbPnq3x48crNjZWe/fulc1mk3T5KtOcOXO0ZMkSeXl5adCgQXrqqae0dOlSS7VYkZ2d7fiSVUnKzMyUdPmqYU5OjqfKsqyg9rLcw2/RT+lWnvopT71I9HOt7BXMddlvscfzMk7/Leu8813vw22BSpIqVKig48ePl2juwYMHZYxRREREsXM2bNig3bt368iRIwoJCZEkLVmyRM2aNdP27dvVtm1bl2vt06ePHnvsMUnS5MmTlZiYqLZt22rAgAGSpPHjx6t9+/Y6efKkateuLenyH4BFixapYcOGkqSRI0dq6tSpLtfgDjNmzNCUKVMKjX/yySfy8/PzQEXutX79ek+X4Fb0U7qVp37KUy8S/ZTUrHbXZbe/67k2+Z45sJtlZeXrQRe3dSlQffDBB07PjTFKT0/XggUL1LFjxxLtw5jLKbDg6k9RUlJSFBIS4ghTktS0aVMFBQUpJSXFUqD67ZeP1qpVS5LUokWLQmOnTp1yBCo/Pz9HmJKkOnXq6NSpUy7X4A4TJ07U2LFjHc8zMzMVEhKinj17KjAw0IOVWZOTk6P169erR48e8vHx8XQ5ltFP6Vae+ilPvUj0c62ax69z+z6vxu5l9FybfE3a4aXs/OL/PS8rvHNcWgl1eVtXNrryk2c2m001atRQt27d9MILL5RoH40bN5bNZlNKSkqxn2Qr+KLQko5LkpeXlyOsFSjq0upvfyMX7Kuosfz8/CK3KZhz5bFuNLvdXuSaLB8fn3Lxl0956aMA/ZRu5amf8tSLRD8llZ3nmVCTnW/z2LHdKc9CKHQpiuXn5zs98vLydOLECb311luqU6dOifYRHBysmJgYLVy4UBcvXiz0ekZGhpo2baq0tDQdPXrUMb5v3z6dP39eTZo0KXK/NWrUcFoonpeXp2+//fYaOwQAACg5lwLV1KlTlZWVVWj8l19+uaY1RQkJCcrLy1O7du20fPlyff/990pJSdHLL7+s9u3bq3v37oqMjNTAgQO1c+dObdu2TbGxsYqKilKbNm2K3Ge3bt20evVqrV69Wvv371dcXJwyMjJcadOtLl26pOTkZCUnJ+vSpUs6duyYkpOTdfDgQcecCxcuOOZIl7+RPjk5WWlpaR6qGgAAlIRLgWrKlCm6cOFCofGsrKwiF0gXJywsTDt37lR0dLTGjRun5s2bq0ePHtq4caMSExNls9m0cuVKVa1aVV26dFH37t0VHh6uZcuWFbvPoUOHavDgwY7gFRYWpujoaFfadKvjx4+rVatWatWqldLT0zVnzhy1atVKw4YNc8zZsWOHY44kjR07Vq1atdLkyZM9VTYAACgBl9ZQFbeG6ZtvvlFwcPA17atOnTpasGCBFixYUOTr9evX16pVq4rdPj4+XvHx8Y7nPj4+SkhIUEJCQrHbpKamFhq7ci1UaGio09iQIUOcvstKuryWrKRrqK7cX1G6du3q8TVZAADg2l1ToKpatapsNptsNptuuukmp1CVl5enCxcuaPjw4W4vEgAAoDS7pkA1b948GWM0dOhQTZkyRVWqVHG8VrFiRYWGhqp9+/ZuL7IsCAgIKPa1jz/+WJ07d76B1QAAgBvpmgLV4MGDJV1e+9ShQ4dy9RFWqwoWkhelXr16N64QAABww7m0hioqKsrx619++aXQ9zyV5S+UdFWjRo08XQIAAPAQlz7ll5WVpZEjR6pmzZoKCAhQ1apVnR4AAAB/JC4FqqefflqffvqpEhISZLfb9dprr2nKlCmqW7eu3njjDXfXCAAAUKq5dMvvww8/1BtvvKGuXbtq6NCh6ty5sxo1aqQGDRpo6dKlGjhwoLvrBAAAKLVcukJ19uxZhYWFSbq8Xurs2bOSpE6dOuk///mP+6oDAAAoA1wKVOHh4Y4vx2zatKneffddSZevXAUFBbmrNgAAgDLBpUD18MMP65tvvpEkTZw40bGWasyYMXr66afdWiAAAEBp59IaqjFjxjh+HR0drf3792vHjh1q2LChbrnlFrcVBwAAUBa4FKh+69dff1X9+vVVv359d9QDAABQ5rh0yy8vL0/PPfec6tWrp4CAAB0+fFiSNGnSJL3++utuLRAAAKC0cylQTZs2TYsXL9asWbNUsWJFx3iLFi302muvua04AACAssClQPXGG2/olVde0cCBA1WhQgXHeGRkpPbv3++24gAAAMoClwLVsWPHivzZdfn5+YV+rh8AAEB551KgatasmT777LNC4++9955atWpluSgAAICyxKVP+T377LN66KGHdOzYMeXn52vFihU6cOCA3njjDX300UfurhEAAKBUu6YrVIcPH5YxRn379tWyZcu0Zs0a2Ww2TZ48WSkpKfrwww/Vo0eP61UrAABAqXRNV6gaN26s9PR01axZUzExMfqf//kfHTx4ULVr175e9QEAAJR613SFyhjj9Pzjjz9WVlaWWwsCAAAoa1xalF7gyoAFAADwR3RNt/xsNptsNluhMQAA4HmpM++4ocfLycnRmjVr9G18jHx8fG7osa+HM2fOqPoLrm17TYHKGKMhQ4bIbrdLuvxz/IYPHy5/f3+neStWrHCtGgAAgDLomgLV4MGDnZ4PGjTIrcUAAACURdcUqJKSkq5XHQAAAGWWpUXpAAAAIFABAABYRqACAACwiEAFAABgEYEKAADAIgIVAACARQQqAAAAiwhUAAAAFhGoAAAALCJQAQAAWESgAgAAsOiafpYfAAC4PkInrPZ0CdfMXsFoVjupefw6ZefZPF2OZd65F13elitUAAAAFhGoAAAALCJQAQAAWESgAgAAsIhABQAAYBGBCgAAwCICFQAAgEUEKgAAAIsIVAAAABYRqAAAACwiUAEAAFhEoAIAALCIQAUAAGARgQoAAMAiAhUAAIBFBCoAAACLCFQAAAAWEagAAAAsIlABAABYRKACAACwiEAFAABgEYEKAADAIgIVAACARWU+UMXHx6tly5aeLgMAAPyBeTxQnThxQqNGjVJ4eLjsdrtCQkLUt29fbdy40dOludXevXt19913KzQ0VDabTfPmzbvq/BkzZshms2n06NE3pD4AAOA6b08ePDU1VR07dlRQUJBmzZqlyMhI5eTkaN26dRoxYoT279/vyfLcKisrS+Hh4RowYIDGjBlz1bnbt2/XK6+8osjIyBtUHQAAsMKjV6ji4uJks9m0bds23XPPPbrpppvUrFkzjR07Vl9++aUkKS0tTf369VNAQIACAwN177336uTJk8Xus2vXroWu6vTv319DhgxxPA8NDdXzzz+v2NhYBQQEqEGDBlq1apVOnz7tOFaLFi20Y8cOxzaLFy9WUFCQ1q1bpyZNmiggIEC9evVSenp6iXpt27atZs+erfvvv192u73YeRcuXNDAgQP16quvqmrVqiXaNwAA8CyPXaE6e/as1q5dq2nTpsnf37/Q60FBQTLGqH///vL399eWLVuUm5uruLg43Xfffdq8ebOl48+dO1fTp0/XpEmTNHfuXD300EPq2LGjhg4dqtmzZ2v8+PGKjY3V3r17ZbPZJF2+yjRnzhwtWbJEXl5eGjRokJ566iktXbrUUi2/NWLECN1xxx3q3r27nn/++d+dn52drezsbMfzzMxMSVJOTo5ycnLcVteNVlB7We7ht+indCtP/ZSnXqQ/Vj/2CuZGl2OZ3cs4/bes8853vQ+PBaqDBw/KGKOIiIhi52zYsEG7d+/WkSNHFBISIklasmSJmjVrpu3bt6tt27YuH79Pnz567LHHJEmTJ09WYmKi2rZtqwEDBkiSxo8fr/bt2+vkyZOqXbu2pMt/ABYtWqSGDRtKkkaOHKmpU6e6XMOV3nnnHe3cuVPbt28v8TYzZszQlClTCo1/8skn8vPzc1ttnrJ+/XpPl+BW9FO6lad+ylMv0h+jn1ntPFCImzzXJt/TJbhFVla+HnRxW48FKmMup8CCqz9FSUlJUUhIiCNMSVLTpk0VFBSklJQUS4Hqt+uTatWqJUlq0aJFobFTp045ApWfn58jTElSnTp1dOrUKZdr+K2jR4/qySef1CeffCJfX98Sbzdx4kSNHTvW8TwzM1MhISHq2bOnAgMD3VKbJ+Tk5Gj9+vXq0aOHfHx8PF2OZfRTupWnfspTL9Ifq5/m8es8VJXr7F5Gz7XJ16QdXsrOL/7f87LCO8f1lVAeC1SNGzeWzWZTSkqK+vfvX+QcY0yRgau4cUny8vJyhLUCRV1a/e1v5IJ9FTWWn59f5DYFc648lqu+/vprnTp1Sq1bt3aM5eXl6T//+Y8WLFig7OxsVahQodB2dru9yDVZPj4+5eIvn/LSRwH6Kd3KUz/lqRfpj9FPdl7ZDSTZ+bYyXX+BPAuh0GOL0oODgxUTE6OFCxfq4sWLhV7PyMhQ06ZNlZaWpqNHjzrG9+3bp/Pnz6tJkyZF7rdGjRpOC8Xz8vL07bffur8BN7v99tu1Z88eJScnOx5t2rTRwIEDlZycXGSYAgAApYNHvzYhISFBHTp0ULt27TR16lRFRkYqNzdX69evV2Jiovbt26fIyEgNHDhQ8+bNcyxKj4qKUps2bYrcZ7du3TR27FitXr1aDRs21Ny5c5WRkXFjGyvCpUuXtG/fPsevjx07puTkZAUEBKhRo0aqXLmymjdv7rSNv7+/qlWrVmgcAACULh792oSwsDDt3LlT0dHRGjdunJo3b64ePXpo48aNSkxMlM1m08qVK1W1alV16dJF3bt3V3h4uJYtW1bsPocOHarBgwcrNjZWUVFRCgsLU3R09A3sqmjHjx9Xq1at1KpVK6Wnp2vOnDlq1aqVhg0b5unSAACARR69QiVdXti9YMECLViwoMjX69evr1WrVhW7fXx8vOLj4x3PfXx8lJCQoISEhGK3SU1NLTR25Vqo0NBQp7EhQ4Y4fZeVdPn7rUq6hurK/ZWE1a+GAAAAN4bHf/QMAABAWUegcpOAgIBiH5999pmnywMAANeRx2/5lRfJycnFvlavXr0bVwgAALjhCFRu0qhRI0+XAAAAPIRbfgAAABYRqAAAACwiUAEAAFhEoAIAALCIQAUAAGARgQoAAMAiAhUAAIBFBCoAAACLCFQAAAAWEagAAAAsIlABAABYRKACAACwiEAFAABgEYEKAADAIgIVAACARQQqAAAAiwhUAAAAFhGoAAAALPL2dAEAAEBKnXmHp0u4Zjk5OVqzZo2+jY+Rj4+Pp8ux7MyZM6r+gmvbcoUKAADAIgIVAACARQQqAAAAiwhUAAAAFhGoAAAALCJQAQAAWESgAgAAsIhABQAAYBGBCgAAwCICFQAAgEUEKgAAAIsIVAAAABYRqAAAACwiUAEAAFhkM8YYTxcB98nMzFSVKlV0/vx5BQYGunXfoRNWu3V/V2OvYDSrXZ6e2VZB2Xm2G3bc64V+Srfy1E956kWin9KuvPXjnXtRh164z6V/Q7lCBQAAYBGBCgAAwCICFQAAgEUEKgAAAIsIVAAAABYRqAAAACwiUAEAAFhEoAIAALCIQAUAAGARgQoAAMAiAhUAAIBFBCoAAACLCFQAAAAWEagAAAAsIlABAABYRKACAACwiEAFAABgEYEKAADAIgIVAACARQQqAAAAiwhUAAAAFhGoAAAALCJQAQAAWPSHCFTx8fFq2bKlp8sAAADlVJkIVCdOnNCoUaMUHh4uu92ukJAQ9e3bVxs3bvR0aW6Vk5OjqVOnqmHDhvL19dUtt9yitWvXerosAADwO7w9XcDvSU1NVceOHRUUFKRZs2YpMjJSOTk5WrdunUaMGKH9+/d7ukS3+cc//qE333xTr776qiIiIrRu3Tr99a9/1datW9WqVStPlwcAAIpR6q9QxcXFyWazadu2bbrnnnt00003qVmzZho7dqy+/PJLSVJaWpr69eungIAABQYG6t5779XJkyeL3WfXrl01evRop7H+/ftryJAhjuehoaF6/vnnFRsbq4CAADVo0ECrVq3S6dOnHcdq0aKFduzY4dhm8eLFCgoK0rp169SkSRMFBASoV69eSk9PL1GvS5Ys0d/+9jf16dNH4eHhevzxxxUTE6MXXnih5G8YAAC44Ur1FaqzZ89q7dq1mjZtmvz9/Qu9HhQUJGOM+vfvL39/f23ZskW5ubmKi4vTfffdp82bN1s6/ty5czV9+nRNmjRJc+fO1UMPPaSOHTtq6NChmj17tsaPH6/Y2Fjt3btXNptNkpSVlaU5c+ZoyZIl8vLy0qBBg/TUU09p6dKlv3u87Oxs+fr6Oo1VqlRJn3/++VW3yc7OdjzPzMyUdPn2YU5OjittF8tewbh1f1c9lpdx+m9ZRz+lW3nqpzz1ItFPaVfe+vHOd72PUh2oDh48KGOMIiIiip2zYcMG7d69W0eOHFFISIiky1d6mjVrpu3bt6tt27YuH79Pnz567LHHJEmTJ09WYmKi2rZtqwEDBkiSxo8fr/bt2+vkyZOqXbu2pMtBZtGiRWrYsKEkaeTIkZo6dWqJjhcTE6MXX3xRXbp0UcOGDbVx40atWrVKeXl5xW4zY8YMTZkypdD4J598Ij8/v2vq9/fMaufW3ZXIc23yb/xBryP6Kd3KUz/lqReJfkq78tJPVla+HnRx21IdqIy5nBQLrv4UJSUlRSEhIY4wJUlNmzZVUFCQUlJSLAWqyMhIx69r1aolSWrRokWhsVOnTjkClZ+fnyNMSVKdOnV06tSpEh3vpZde0qOPPqqIiAjZbDY1bNhQDz/8sJKSkordZuLEiRo7dqzjeWZmpkJCQtSzZ08FBgaW6Lgl1Tx+nVv3dzV2L6Pn2uRr0g4vZecXf/7LCvop3cpTP+WpF4l+Srvy1o93jusroUp1oGrcuLFsNptSUlLUv3//IucYY4oMXMWNS5KXl5cjrBUo6vaYj4+P49cF+ypqLD8/v8htCuZceazi1KhRQytXrtSvv/6qM2fOqG7dupowYYLCwsKK3cZut8tutxdZ+5W1WJWdd+P/sGTn2zxy3OuFfkq38tRPeepFop/Srrz0k2chFJbqRenBwcGKiYnRwoULdfHixUKvZ2RkqGnTpkpLS9PRo0cd4/v27dP58+fVpEmTIvdbo0YNp4XieXl5+vbbb93fgIt8fX1Vr1495ebmavny5erXr5+nSwIAAFdRqgOVJCUkJCgvL0/t2rXT8uXL9f333yslJUUvv/yy2rdvr+7duysyMlIDBw7Uzp07tW3bNsXGxioqKkpt2rQpcp/dunXT6tWrtXr1au3fv19xcXHKyMi4sY0V4auvvtKKFSt0+PBhffbZZ+rVq5fy8/P1zDPPeLo0AABwFaU+UIWFhWnnzp2Kjo7WuHHj1Lx5c/Xo0UMbN25UYmKibDabVq5cqapVq6pLly7q3r27wsPDtWzZsmL3OXToUA0ePNgRvMLCwhQdHX0Duyrar7/+qn/84x9q2rSp/vrXv6pevXr6/PPPFRQU5OnSAADAVdhMSRf4oEzIzMxUlSpVdP78ebcvSg+dsNqt+7saewWjWe3y9My2CuXivjz9lG7lqZ/y1ItEP6VdeevHO/eiDr1wn0v/hpb6K1QAAAClHYHqBgoICCj28dlnn3m6PAAA4KJS/bUJ5U1ycnKxr9WrV+/GFQIAANyKQHUDNWrUyNMlAACA64BbfgAAABYRqAAAACwiUAEAAFhEoAIAALCIQAUAAGARgQoAAMAiAhUAAIBFBCoAAACLCFQAAAAWEagAAAAsIlABAABYRKACAACwiEAFAABgEYEKAADAIgIVAACARQQqAAAAiwhUAAAAFhGoAAAALLIZY4yni4D7ZGZmqkqVKjp//rwCAwM9XY7LcnJytGbNGvXp00c+Pj6eLscy+indylM/5akXiX5Ku/LWz5kzZ1S9enWX/g3lChUAAIBFBCoAAACLCFQAAAAWEagAAAAsIlABAABYRKACAACwiEAFAABgEYEKAADAIgIVAACARQQqAAAAiwhUAAAAFhGoAAAALCJQAQAAWESgAgAAsIhABQAAYBGBCgAAwCJvTxcA9zLGSJIyMzM9XIk1OTk5ysrKUmZmpnx8fDxdjmX0U7qVp37KUy8S/ZR25a2fn3/+WdL//Vt6LQhU5UzBb4aQkBAPVwIAQNl05swZValS5Zq2sRlXYhhKrfz8fB0/flyVK1eWzWbzdDkuy8zMVEhIiI4eParAwEBPl2MZ/ZRu5amf8tSLRD+lXXnr5/z586pfv77OnTunoKCga9qWK1TljJeXl/70pz95ugy3CQwMLBd/SAvQT+lWnvopT71I9FPalbd+vLyufYk5i9IBAAAsIlABAABYRKBCqWS32/Xss8/Kbrd7uhS3oJ/SrTz1U556keintKOf/8OidAAAAIu4QgUAAGARgQoAAMAiAhUAAIBFBCoAAACLCFTwiOzsbLVs2VI2m03JyclOr6Wlpalv377y9/dX9erV9cQTT+jSpUtOc/bs2aOoqChVqlRJ9erV09SpUwv97KUtW7aodevW8vX1VXh4uBYtWuT2Pv7yl7+ofv368vX1VZ06dfTQQw/p+PHjTnNsNluhx5W1lKV+ysr5SU1N1SOPPKKwsDBVqlRJDRs21LPPPluo1rJwfkraS1k5N5I0bdo0dejQQX5+fsV+I3VZODfX0k9ZOj9FCQ0NLXQ+JkyY4DTHXT16SkJCgsLCwuTr66vWrVvrs88+K/nGBvCAJ554wvTu3dtIMrt27XKM5+bmmubNm5vo6Gizc+dOs379elO3bl0zcuRIx5zz58+bWrVqmfvvv9/s2bPHLF++3FSuXNnMmTPHMefw4cPGz8/PPPnkk2bfvn3m1VdfNT4+Pub99993ax8vvvii+eKLL0xqaqr573//a9q3b2/at2/vNEeSSUpKMunp6Y5HVlZWmeynLJ2fjz/+2AwZMsSsW7fOHDp0yKxatcrUrFnTjBs3zmleWTg/JemlLJ0bY4yZPHmyefHFF83YsWNNlSpVipxTFs5NSfspa+enKA0aNDBTp051Oh8///yz23v0lHfeecf4+PiYV1991ezbt888+eSTxt/f3/zwww8l2p5AhRtuzZo1JiIiwuzdu7dQoFqzZo3x8vIyx44dc4y9/fbbxm63m/PnzxtjjElISDBVqlQxv/76q2POjBkzTN26dU1+fr4xxphnnnnGREREOB33scceM7fddtt17MyYVatWGZvNZi5duuQYk2T+/e9/F7tNWeqnrJ+fWbNmmbCwMKexsnp+ruylrJ6bpKSkqwaqsnZuiuunrJ6f32rQoIGZO3dusa+7q0dPadeunRk+fLjTWEREhJkwYUKJtueWH26okydP6tFHH9WSJUvk5+dX6PUvvvhCzZs3V926dR1jMTExys7O1tdff+2YExUV5fTFazExMTp+/LhSU1Mdc3r27Om075iYGO3YsUM5OTnXoTPp7NmzWrp0qTp06CAfHx+n10aOHKnq1aurbdu2WrRokfLz8x2vlaV+yvL5kS7/4NPg4OBC42Xx/FzZS1k/N8Upi+emKOXl/Pzzn/9UtWrV1LJlS02bNs3pdp67evSES5cu6euvvy703vbs2VNbt24t0T4IVLhhjDEaMmSIhg8frjZt2hQ558SJE6pVq5bTWNWqVVWxYkWdOHGi2DkFz39vTm5urn766Se39FNg/Pjx8vf3V7Vq1ZSWlqZVq1Y5vf7cc8/pvffe04YNG3T//fdr3Lhxmj59uuP1stRPWTw/BQ4dOqT58+dr+PDhTuNl7fwU10tZPjfFKYvnpjjl4fw8+eSTeuedd7Rp0yaNHDlS8+bNU1xcnON1d/XoCT/99JPy8vKKrK2kdRGoYFl8fHyRi0d/+9ixY4fmz5+vzMxMTZw48ar7s9lshcaMMU7jV84x//+CxmudY6WfAk8//bR27dqlTz75RBUqVFBsbKzTAst//OMfat++vVq2bKlx48Zp6tSpmj179lV7Ls39lLXzI0nHjx9Xr169NGDAAA0bNszpNU+eH3f3UhbPzdWUtT87v8fT56co19LjmDFjFBUVpcjISA0bNkyLFi3S66+/rjNnzri9R08pqraS1uV9PQrCH8vIkSN1//33X3VOaGionn/+eX355ZeFfkZSmzZtNHDgQP3v//6vateura+++srp9XPnziknJ8fxfw61a9cu9H8Mp06dkqTfnePt7a1q1aq5pZ8C1atXV/Xq1XXTTTepSZMmCgkJ0Zdffqn27dsXue1tt92mzMxMnTx5UrVq1SpT/ZTF83P8+HFFR0erffv2euWVV666nXRjz487eymL5+ZalfY/O1dTGs5PUaz0eNttt0mSDh48qGrVqrmtR0+oXr26KlSoUGRtJa7LygIu4Fr88MMPZs+ePY7HunXrjCTz/vvvm6NHjxpj/m9R4/Hjxx3bvfPOO4UWNQYFBZns7GzHnJkzZxZauNmkSROn4w8fPvy6L9xMS0szksymTZuKnTN//nzj6+vrWJRZlvopa+fnxx9/NI0bNzb333+/yc3NLdE2pfX8/F4vZe3cFLjaovQrldZz81u/tyi9rJ2fq/nwww+NJMen4NzVo6e0a9fOPP74405jTZo0KfGidAIVPObIkSPFfm3C7bffbnbu3Gk2bNhg/vSnPzl97DYjI8PUqlXLPPDAA2bPnj1mxYoVJjAwsMiPFo8ZM8bs27fPvP76627/aPFXX31l5s+fb3bt2mVSU1PNp59+ajp16mQaNmzo+Av/gw8+MK+88orZs2ePOXjwoHn11VdNYGCgeeKJJ8pkP2Xp/Bw7dsw0atTIdOvWzfz4449OH/UuUFbOT0l6KUvnxpjL/4O1a9cuM2XKFBMQEGB27dpldu3a5fgYflk5NyXtp6ydnytt3brVvPjii2bXrl3m8OHDZtmyZaZu3brmL3/5i2OOu3r0lIKvTXj99dfNvn37zOjRo42/v79JTU0t0fYEKnhMUYHKmMt/Md1xxx2mUqVKJjg42IwcOdLpI7bGGLN7927TuXNnY7fbTe3atU18fHyh/7vZvHmzadWqlalYsaIJDQ01iYmJbq1/9+7dJjo62gQHBxu73W5CQ0PN8OHDzY8//uiY8/HHH5uWLVuagIAA4+fnZ5o3b27mzZtncnJyymQ/xpSd85OUlGQkFfkoUFbOT0l6MabsnBtjjBk8eHCR/RRcDS0r56ak/RhTts7Plb7++mtz6623mipVqhhfX19z8803m2effdZcvHjRaZ67evSUhQsXmgYNGpiKFSuaP//5z2bLli0l3tZmTCn5elIAAIAyik/5AQAAWESgAgAAsIhABQAAYBGBCgAAwCICFQAAgEUEKgAAAIsIVAAAABYRqAAAACwiUAGAG3Tt2lWjR4/2dBkAPIRABeC6GzJkiGw2W6HHwYMH3bL/xYsXKygoyC37ctWKFSv03HPPebSGq9m8ebNsNpsyMjI8XQpQLnl7ugAAfwy9evVSUlKS01iNGjU8VE3xcnJy5OPjc83bBQcHX4dq3CMnJ8fTJQDlHleoANwQdrtdtWvXdnpUqFBBkvThhx+qdevW8vX1VXh4uKZMmaLc3FzHti+++KJatGghf39/hYSEKC4uThcuXJB0+crLww8/rPPnzzuufMXHx0uSbDabVq5c6VRHUFCQFi9eLElKTU2VzWbTu+++q65du8rX11dvvvmmJCkpKUlNmjSRr6+vIiIilJCQcNX+rrzlFxoaqueff16xsbEKCAhQgwYNtGrVKp0+fVr9+vVTQECAWrRooR07dji2KbjStnLlSt10003y9fVVjx49dPToUadjJSYmqmHDhqpYsaJuvvlmLVmyxOl1m82mRYsWqV+/fvL399ewYcMUHR0tSapatapsNpuGDBkiSVq7dq06deqkoKAgVatWTXfeeacOHTrk2FfBe7RixQpFR0fLz89Pt9xyi7744gunY/73v/9VVFSU/Pz8VLVqVcXExOjcuXOSJGOMZs2apfDwcFWqVEm33HKL3n///au+n0CZc71+YjMAFBg8eLDp169fka+tXbvWBAYGmsWLF5tDhw6ZTz75xISGhpr4+HjHnLlz55pPP/3UHD582GzcuNHcfPPN5vHHHzfGGJOdnW3mzZtnAgMDTXp6uklPTzc///yzMcYYSebf//630/GqVKlikpKSjDHGHDlyxEgyoaGhZvny5ebw4cPm2LFj5pVXXjF16tRxjC1fvtwEBwebxYsXF9tjVFSUefLJJx3PGzRoYIKDg82iRYvMd999Zx5//HFTuXJl06tXL/Puu++aAwcOmP79+5smTZqY/Px8Y4wxSUlJxsfHx7Rp08Zs3brV7Nixw7Rr18506NDBsd8VK1YYHx8fs3DhQnPgwAHzwgsvmAoVKphPP/3UMUeSqVmzpnn99dfNoUOHTGpqqlm+fLmRZA4cOGDS09NNRkaGMcaY999/3yxfvtx89913ZteuXaZv376mRYsWJi8vz+k9ioiIMB999JE5cOCAueeee0yDBg1MTk6OMcaYXbt2Gbvdbh5//HGTnJxsvv32WzN//nxz+vRpY4wxf/vb30xERIRZu3atOXTokElKSjJ2u91s3ry52PcTKGsIVACuu8GDB5sKFSoYf39/x+Oee+4xxhjTuXNnM336dKf5S5YsMXXq1Cl2f++++66pVq2a43lSUpKpUqVKoXklDVTz5s1zmhMSEmLeeustp7HnnnvOtG/fvtiaigpUgwYNcjxPT083ksykSZMcY1988YWRZNLT0x19SDJffvmlY05KSoqRZL766itjjDEdOnQwjz76qNOxBwwYYPr06ePU9+jRo53mbNq0yUgy586dK7YHY4w5deqUkWT27NljjPm/9+i1115zzNm7d6+RZFJSUowxxjzwwAOmY8eORe7vwoULxtfX12zdutVp/JFHHjEPPPDAVWsByhLWUAG4IaKjo5WYmOh47u/vL0n6+uuvtX37dk2bNs3xWl5enn799VdlZWXJz89PmzZt0vTp07Vv3z5lZmYqNzdXv/76qy5evOjYjxVt2rRx/Pr06dM6evSoHnnkET366KOO8dzcXFWpUuWa9hsZGen4da1atSRJLVq0KDR26tQp1a5dW5Lk7e3tVE9ERISCgoKUkpKidu3aKSUlRf/v//0/p+N07NhRL730UrE9Xc2hQ4c0adIkffnll/rpp5+Un58vSUpLS1Pz5s2L7KVOnTqOuiMiIpScnKwBAwYUuf99+/bp119/VY8ePZzGL126pFatWpWoRqAsIFABuCH8/f3VqFGjQuP5+fmaMmWK7rrrrkKv+fr66ocfflCfPn00fPhwPffccwoODtbnn3+uRx555HcXW9tsNhljnMaK2ua3oawgULz66qu69dZbneYVrPkqqd8ubrfZbMWOFRzzyvHixq583RhTaKykQbNv374KCQnRq6++qrp16yo/P1/NmzfXpUuXfreXgrorVapU7P4L5qxevVr16tVzes1ut5eoRqAsIFAB8Kg///nPOnDgQJFhS5J27Nih3NxcvfDCC/Lyuvw5mnfffddpTsWKFZWXl1do2xo1aig9Pd3x/Pvvv1dWVtZV66lVq5bq1aunw4cPa+DAgdfajmW5ubnasWOH2rVrJ0k6cOCAMjIyFBERIUlq0qSJPv/8c8XGxjq22bp1q5o0aXLV/VasWFGSnN6nM2fOKCUlRf/617/UuXNnSdLnn39+zTVHRkZq48aNmjJlSqHXmjZtKrvdrrS0NEVFRV3zvoGygkAFwKMmT56sO++8UyEhIRowYIC8vLy0e/du7dmzR88//7waNmyo3NxczZ8/X3379tV///tfLVq0yGkfoaGhunDhgjZu3KhbbrlFfn5+8vPzU7du3bRgwQLddtttys/P1/jx40v0lQjx8fF64oknFBgYqN69eys7O1s7duzQuXPnNHbs2Ov1Vki6fCVo1KhRevnll+Xj46ORI0fqtttucwSsp59+Wvfee6/+/Oc/6/bbb9eHH36oFStWaMOGDVfdb4MGDWSz2fTRRx+pT58+qlSpkqpWrapq1arplVdeUZ06dZSWlqYJEyZcc80TJ05UixYtFBcXp+HDh6tixYratGmTBgwYoOrVq+upp57SmDFjlJ+fr06dOikzM1Nbt25VQECABg8e7NL7BJQ6nl7EBaD8u9qn/Iy5/Em/Dh06mEqVKpnAwEDTrl0788orrzhef/HFF02dOnVMpUqVTExMjHnjjTcKLbAePny4qVatmpFknn32WWOMMceOHTM9e/Y0/v7+pnHjxmbNmjVFLkrftWtXoZqWLl1qWrZsaSpWrGiqVq1qunTpYlasWFFsD0UtSp87d67THF2xSP7K4xcsrl++fLkJDw83FStWNN26dTOpqalO+0lISDDh4eHGx8fH3HTTTeaNN9646nEKTJ061dSuXdvYbDYzePBgY4wx69evN02aNDF2u91ERkaazZs3O21f1Ht07tw5I8ls2rTJMbZ582bToUMHY7fbTVBQkImJiXGcn/z8fPPSSy+Zm2++2fj4+JgaNWqYmJgYs2XLlmLfT6CssRlzxQIDAIBHLF68WKNHj+bbzIEyiC/2BAAAsIhABQAAYBG3/AAAACziChUAAIBFBCoAAACLCFQAAAAWEagAAAAsIlABAABYRKACAACwiEAFAABgEYEKAADAov8PFiZSdfj11OgAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "plt.figure(figsize=(6,5))\n", + "plt.barh(list(oos_gains_harmful.keys()), list(oos_gains_harmful.values()))\n", + "plt.xlabel('Feature importance')\n", + "plt.ylabel('Features')\n", + "plt.title('Feature importance')\n", + "plt.grid(axis='both')\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}