Skip to content

Commit b7ccdaf

Browse files
authored
[python] Allow to register custom logger in Python-package (#3820)
* centralize Python-package logging in one place * continue * fix test name * removed unused import * enhance test * fix lint * hotfix test * workaround for GPU test * remove custom logger from Dask-package * replace one log func with flags by multiple funcs
1 parent ac706e1 commit b7ccdaf

File tree

9 files changed

+203
-53
lines changed

9 files changed

+203
-53
lines changed

docs/Python-API.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,3 +55,11 @@ Plotting
5555
plot_metric
5656
plot_tree
5757
create_tree_digraph
58+
59+
Utilities
60+
---------
61+
62+
.. autosummary::
63+
:toctree: pythonapi/
64+
65+
register_logger

python-package/lightgbm/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
44
Contributors: https://github.com/microsoft/LightGBM/graphs/contributors.
55
"""
6-
from .basic import Booster, Dataset
6+
from .basic import Booster, Dataset, register_logger
77
from .callback import (early_stopping, print_evaluation, record_evaluation,
88
reset_parameter)
99
from .engine import cv, train, CVBooster
@@ -28,6 +28,7 @@
2828
__version__ = version_file.read().strip()
2929

3030
__all__ = ['Dataset', 'Booster', 'CVBooster',
31+
'register_logger',
3132
'train', 'cv',
3233
'LGBMModel', 'LGBMRegressor', 'LGBMClassifier', 'LGBMRanker',
3334
'print_evaluation', 'record_evaluation', 'reset_parameter', 'early_stopping',

python-package/lightgbm/basic.py

Lines changed: 78 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,10 @@
55
import json
66
import os
77
import warnings
8-
from tempfile import NamedTemporaryFile
98
from collections import OrderedDict
9+
from functools import wraps
10+
from logging import Logger
11+
from tempfile import NamedTemporaryFile
1012

1113
import numpy as np
1214
import scipy.sparse
@@ -15,9 +17,64 @@
1517
from .libpath import find_lib_path
1618

1719

20+
class _DummyLogger:
21+
def info(self, msg):
22+
print(msg)
23+
24+
def warning(self, msg):
25+
warnings.warn(msg, stacklevel=3)
26+
27+
28+
_LOGGER = _DummyLogger()
29+
30+
31+
def register_logger(logger):
32+
"""Register custom logger.
33+
34+
Parameters
35+
----------
36+
logger : logging.Logger
37+
Custom logger.
38+
"""
39+
if not isinstance(logger, Logger):
40+
raise TypeError("Logger should inherit logging.Logger class")
41+
global _LOGGER
42+
_LOGGER = logger
43+
44+
45+
def _normalize_native_string(func):
46+
"""Join log messages from native library which come by chunks."""
47+
msg_normalized = []
48+
49+
@wraps(func)
50+
def wrapper(msg):
51+
nonlocal msg_normalized
52+
if msg.strip() == '':
53+
msg = ''.join(msg_normalized)
54+
msg_normalized = []
55+
return func(msg)
56+
else:
57+
msg_normalized.append(msg)
58+
59+
return wrapper
60+
61+
62+
def _log_info(msg):
63+
_LOGGER.info(msg)
64+
65+
66+
def _log_warning(msg):
67+
_LOGGER.warning(msg)
68+
69+
70+
@_normalize_native_string
71+
def _log_native(msg):
72+
_LOGGER.info(msg)
73+
74+
1875
def _log_callback(msg):
19-
"""Redirect logs from native library into Python console."""
20-
print("{0:s}".format(msg.decode('utf-8')), end='')
76+
"""Redirect logs from native library into Python."""
77+
_log_native("{0:s}".format(msg.decode('utf-8')))
2178

2279

2380
def _load_lib():
@@ -329,8 +386,8 @@ def convert_from_sliced_object(data):
329386
"""Fix the memory of multi-dimensional sliced object."""
330387
if isinstance(data, np.ndarray) and isinstance(data.base, np.ndarray):
331388
if not data.flags.c_contiguous:
332-
warnings.warn("Usage of np.ndarray subset (sliced data) is not recommended "
333-
"due to it will double the peak memory cost in LightGBM.")
389+
_log_warning("Usage of np.ndarray subset (sliced data) is not recommended "
390+
"due to it will double the peak memory cost in LightGBM.")
334391
return np.copy(data)
335392
return data
336393

@@ -620,7 +677,7 @@ def predict(self, data, start_iteration=0, num_iteration=-1,
620677
preds, nrow = self.__pred_for_np2d(data.to_numpy(), start_iteration, num_iteration, predict_type)
621678
else:
622679
try:
623-
warnings.warn('Converting data to scipy sparse matrix.')
680+
_log_warning('Converting data to scipy sparse matrix.')
624681
csr = scipy.sparse.csr_matrix(data)
625682
except BaseException:
626683
raise TypeError('Cannot predict data for type {}'.format(type(data).__name__))
@@ -1103,9 +1160,9 @@ def _lazy_init(self, data, label=None, reference=None,
11031160
.co_varnames[:getattr(self.__class__, '_lazy_init').__code__.co_argcount])
11041161
for key, _ in params.items():
11051162
if key in args_names:
1106-
warnings.warn('{0} keyword has been found in `params` and will be ignored.\n'
1107-
'Please use {0} argument of the Dataset constructor to pass this parameter.'
1108-
.format(key))
1163+
_log_warning('{0} keyword has been found in `params` and will be ignored.\n'
1164+
'Please use {0} argument of the Dataset constructor to pass this parameter.'
1165+
.format(key))
11091166
# user can set verbose with params, it has higher priority
11101167
if not any(verbose_alias in params for verbose_alias in _ConfigAliases.get("verbosity")) and silent:
11111168
params["verbose"] = -1
@@ -1126,7 +1183,7 @@ def _lazy_init(self, data, label=None, reference=None,
11261183
if categorical_indices:
11271184
for cat_alias in _ConfigAliases.get("categorical_feature"):
11281185
if cat_alias in params:
1129-
warnings.warn('{} in param dict is overridden.'.format(cat_alias))
1186+
_log_warning('{} in param dict is overridden.'.format(cat_alias))
11301187
params.pop(cat_alias, None)
11311188
params['categorical_column'] = sorted(categorical_indices)
11321189

@@ -1172,7 +1229,7 @@ def _lazy_init(self, data, label=None, reference=None,
11721229
self.set_group(group)
11731230
if isinstance(predictor, _InnerPredictor):
11741231
if self._predictor is None and init_score is not None:
1175-
warnings.warn("The init_score will be overridden by the prediction of init_model.")
1232+
_log_warning("The init_score will be overridden by the prediction of init_model.")
11761233
self._set_init_score_by_predictor(predictor, data)
11771234
elif init_score is not None:
11781235
self.set_init_score(init_score)
@@ -1314,7 +1371,7 @@ def construct(self):
13141371
if self.reference is not None:
13151372
reference_params = self.reference.get_params()
13161373
if self.get_params() != reference_params:
1317-
warnings.warn('Overriding the parameters from Reference Dataset.')
1374+
_log_warning('Overriding the parameters from Reference Dataset.')
13181375
self._update_params(reference_params)
13191376
if self.used_indices is None:
13201377
# create valid
@@ -1583,11 +1640,11 @@ def set_categorical_feature(self, categorical_feature):
15831640
self.categorical_feature = categorical_feature
15841641
return self._free_handle()
15851642
elif categorical_feature == 'auto':
1586-
warnings.warn('Using categorical_feature in Dataset.')
1643+
_log_warning('Using categorical_feature in Dataset.')
15871644
return self
15881645
else:
1589-
warnings.warn('categorical_feature in Dataset is overridden.\n'
1590-
'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
1646+
_log_warning('categorical_feature in Dataset is overridden.\n'
1647+
'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
15911648
self.categorical_feature = categorical_feature
15921649
return self._free_handle()
15931650
else:
@@ -1840,8 +1897,8 @@ def get_data(self):
18401897
elif isinstance(self.data, DataTable):
18411898
self.data = self.data[self.used_indices, :]
18421899
else:
1843-
warnings.warn("Cannot subset {} type of raw data.\n"
1844-
"Returning original raw data".format(type(self.data).__name__))
1900+
_log_warning("Cannot subset {} type of raw data.\n"
1901+
"Returning original raw data".format(type(self.data).__name__))
18451902
self.need_slice = False
18461903
if self.data is None:
18471904
raise LightGBMError("Cannot call `get_data` after freed raw data, "
@@ -2011,10 +2068,10 @@ def add_features_from(self, other):
20112068
old_self_data_type)
20122069
err_msg += ("Set free_raw_data=False when construct Dataset to avoid this"
20132070
if was_none else "Freeing raw data")
2014-
warnings.warn(err_msg)
2071+
_log_warning(err_msg)
20152072
self.feature_name = self.get_feature_name()
2016-
warnings.warn("Reseting categorical features.\n"
2017-
"You can set new categorical features via ``set_categorical_feature`` method")
2073+
_log_warning("Reseting categorical features.\n"
2074+
"You can set new categorical features via ``set_categorical_feature`` method")
20182075
self.categorical_feature = "auto"
20192076
self.pandas_categorical = None
20202077
return self
@@ -2834,7 +2891,7 @@ def model_from_string(self, model_str, verbose=True):
28342891
self.handle,
28352892
ctypes.byref(out_num_class)))
28362893
if verbose:
2837-
print('Finished loading model, total used %d iterations' % int(out_num_iterations.value))
2894+
_log_info('Finished loading model, total used %d iterations' % int(out_num_iterations.value))
28382895
self.__num_class = out_num_class.value
28392896
self.pandas_categorical = _load_pandas_categorical(model_str=model_str)
28402897
return self

python-package/lightgbm/callback.py

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
# coding: utf-8
22
"""Callbacks library."""
33
import collections
4-
import warnings
54
from operator import gt, lt
65

7-
from .basic import _ConfigAliases
6+
from .basic import _ConfigAliases, _log_info, _log_warning
87

98

109
class EarlyStopException(Exception):
@@ -67,7 +66,7 @@ def print_evaluation(period=1, show_stdv=True):
6766
def _callback(env):
6867
if period > 0 and env.evaluation_result_list and (env.iteration + 1) % period == 0:
6968
result = '\t'.join([_format_eval_result(x, show_stdv) for x in env.evaluation_result_list])
70-
print('[%d]\t%s' % (env.iteration + 1, result))
69+
_log_info('[%d]\t%s' % (env.iteration + 1, result))
7170
_callback.order = 10
7271
return _callback
7372

@@ -180,15 +179,14 @@ def _init(env):
180179
enabled[0] = not any(env.params.get(boost_alias, "") == 'dart' for boost_alias
181180
in _ConfigAliases.get("boosting"))
182181
if not enabled[0]:
183-
warnings.warn('Early stopping is not available in dart mode')
182+
_log_warning('Early stopping is not available in dart mode')
184183
return
185184
if not env.evaluation_result_list:
186185
raise ValueError('For early stopping, '
187186
'at least one dataset and eval metric is required for evaluation')
188187

189188
if verbose:
190-
msg = "Training until validation scores don't improve for {} rounds"
191-
print(msg.format(stopping_rounds))
189+
_log_info("Training until validation scores don't improve for {} rounds".format(stopping_rounds))
192190

193191
# split is needed for "<dataset type> <metric>" case (e.g. "train l1")
194192
first_metric[0] = env.evaluation_result_list[0][1].split(" ")[-1]
@@ -205,10 +203,10 @@ def _init(env):
205203
def _final_iteration_check(env, eval_name_splitted, i):
206204
if env.iteration == env.end_iteration - 1:
207205
if verbose:
208-
print('Did not meet early stopping. Best iteration is:\n[%d]\t%s' % (
206+
_log_info('Did not meet early stopping. Best iteration is:\n[%d]\t%s' % (
209207
best_iter[i] + 1, '\t'.join([_format_eval_result(x) for x in best_score_list[i]])))
210208
if first_metric_only:
211-
print("Evaluated only: {}".format(eval_name_splitted[-1]))
209+
_log_info("Evaluated only: {}".format(eval_name_splitted[-1]))
212210
raise EarlyStopException(best_iter[i], best_score_list[i])
213211

214212
def _callback(env):
@@ -232,10 +230,10 @@ def _callback(env):
232230
continue # train data for lgb.cv or sklearn wrapper (underlying lgb.train)
233231
elif env.iteration - best_iter[i] >= stopping_rounds:
234232
if verbose:
235-
print('Early stopping, best iteration is:\n[%d]\t%s' % (
233+
_log_info('Early stopping, best iteration is:\n[%d]\t%s' % (
236234
best_iter[i] + 1, '\t'.join([_format_eval_result(x) for x in best_score_list[i]])))
237235
if first_metric_only:
238-
print("Evaluated only: {}".format(eval_name_splitted[-1]))
236+
_log_info("Evaluated only: {}".format(eval_name_splitted[-1]))
239237
raise EarlyStopException(best_iter[i], best_score_list[i])
240238
_final_iteration_check(env, eval_name_splitted, i)
241239
_callback.order = 30

python-package/lightgbm/dask.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
77
It is based on dask-lightgbm, which was based on dask-xgboost.
88
"""
9-
import logging
109
import socket
1110
from collections import defaultdict
1211
from copy import deepcopy
@@ -22,11 +21,9 @@
2221
from dask import delayed
2322
from dask.distributed import Client, default_client, get_worker, wait
2423

25-
from .basic import _ConfigAliases, _LIB, _safe_call
24+
from .basic import _ConfigAliases, _LIB, _log_warning, _safe_call
2625
from .sklearn import LGBMClassifier, LGBMRegressor, LGBMRanker
2726

28-
logger = logging.getLogger(__name__)
29-
3027

3128
def _find_open_port(worker_ip: str, local_listen_port: int, ports_to_skip: Iterable[int]) -> int:
3229
"""Find an open port.
@@ -257,10 +254,10 @@ def _train(client, data, label, params, model_factory, sample_weight=None, group
257254
'voting_parallel'
258255
}
259256
if tree_learner is None:
260-
logger.warning('Parameter tree_learner not set. Using "data" as default')
257+
_log_warning('Parameter tree_learner not set. Using "data" as default')
261258
params['tree_learner'] = 'data'
262259
elif tree_learner.lower() not in allowed_tree_learners:
263-
logger.warning('Parameter tree_learner set to %s, which is not allowed. Using "data" as default' % tree_learner)
260+
_log_warning('Parameter tree_learner set to %s, which is not allowed. Using "data" as default' % tree_learner)
264261
params['tree_learner'] = 'data'
265262

266263
local_listen_port = 12400

python-package/lightgbm/engine.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,12 @@
22
"""Library with training routines of LightGBM."""
33
import collections
44
import copy
5-
import warnings
65
from operator import attrgetter
76

87
import numpy as np
98

109
from . import callback
11-
from .basic import Booster, Dataset, LightGBMError, _ConfigAliases, _InnerPredictor
10+
from .basic import Booster, Dataset, LightGBMError, _ConfigAliases, _InnerPredictor, _log_warning
1211
from .compat import SKLEARN_INSTALLED, _LGBMGroupKFold, _LGBMStratifiedKFold
1312

1413

@@ -146,12 +145,12 @@ def train(params, train_set, num_boost_round=100,
146145
for alias in _ConfigAliases.get("num_iterations"):
147146
if alias in params:
148147
num_boost_round = params.pop(alias)
149-
warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias))
148+
_log_warning("Found `{}` in params. Will use it instead of argument".format(alias))
150149
params["num_iterations"] = num_boost_round
151150
for alias in _ConfigAliases.get("early_stopping_round"):
152151
if alias in params:
153152
early_stopping_rounds = params.pop(alias)
154-
warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias))
153+
_log_warning("Found `{}` in params. Will use it instead of argument".format(alias))
155154
params["early_stopping_round"] = early_stopping_rounds
156155
first_metric_only = params.get('first_metric_only', False)
157156

@@ -525,12 +524,12 @@ def cv(params, train_set, num_boost_round=100,
525524
params['objective'] = 'none'
526525
for alias in _ConfigAliases.get("num_iterations"):
527526
if alias in params:
528-
warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias))
527+
_log_warning("Found `{}` in params. Will use it instead of argument".format(alias))
529528
num_boost_round = params.pop(alias)
530529
params["num_iterations"] = num_boost_round
531530
for alias in _ConfigAliases.get("early_stopping_round"):
532531
if alias in params:
533-
warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias))
532+
_log_warning("Found `{}` in params. Will use it instead of argument".format(alias))
534533
early_stopping_rounds = params.pop(alias)
535534
params["early_stopping_round"] = early_stopping_rounds
536535
first_metric_only = params.get('first_metric_only', False)

python-package/lightgbm/plotting.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,11 @@
11
# coding: utf-8
22
"""Plotting library."""
3-
import warnings
43
from copy import deepcopy
54
from io import BytesIO
65

76
import numpy as np
87

9-
from .basic import Booster
8+
from .basic import Booster, _log_warning
109
from .compat import MATPLOTLIB_INSTALLED, GRAPHVIZ_INSTALLED
1110
from .sklearn import LGBMModel
1211

@@ -326,8 +325,7 @@ def plot_metric(booster, metric=None, dataset_names=None,
326325
num_metric = len(metrics_for_one)
327326
if metric is None:
328327
if num_metric > 1:
329-
msg = "More than one metric available, picking one to plot."
330-
warnings.warn(msg, stacklevel=2)
328+
_log_warning("More than one metric available, picking one to plot.")
331329
metric, results = metrics_for_one.popitem()
332330
else:
333331
if metric not in metrics_for_one:

0 commit comments

Comments
 (0)