longling.ML.toolkit.formatter.EvalFMT 源代码

# coding: utf-8
# 2020/4/13 @ tongshiwei

# incubating

import logging
import json
from longling.lib.stream import as_out_io, PATH_IO_TYPE
from collections import OrderedDict
from longling.lib.formatter import table_format, series_format
from longling.lib.candylib import as_list
import warnings

__all__ = ["eval_format", "EvalFMT", "EpochEvalFMT", "EpisodeEvalFMT", "result_format"]


def _to_dict(name_value: (dict, tuple)) -> dict:
    """Make sure the name_value a dict object"""
    return dict(name_value) if isinstance(name_value, tuple) else name_value


[文档]class EvalFMT(object):
    """
    评价指标格式化类。可以按一定格式快速格式化评价指标。

    Parameters
    ----------
    logger:
        默认为 root logger
    dump_file:
        不为空时，将结果写入dump_file
    col: int
        每行放置的指标数量
    kwargs:
        拓展兼容性参数

    Examples
    --------
    >>> import numpy as np
    >>> from longling.ML.metrics import classification_report
    >>> y_true = np.array([0, 0, 1, 1, 2, 1])
    >>> y_pred = np.array([2, 1, 0, 1, 1, 0])
    >>> y_score = np.array([
    ...     [0.15, 0.4, 0.45],
    ...     [0.1, 0.9, 0.0],
    ...     [0.33333, 0.333333, 0.333333],
    ...     [0.15, 0.4, 0.45],
    ...     [0.1, 0.9, 0.0],
    ...     [0.33333, 0.333333, 0.333333]
    ... ])
    >>> print(EvalFMT.format(
    ...     iteration=30,
    ...     eval_name_value=classification_report(y_true, y_pred, y_score)
    ... ))    # doctest: +NORMALIZE_WHITESPACE
    Iteration [30]
               precision    recall        f1  support
    0           0.000000  0.000000  0.000000        2
    1           0.333333  0.333333  0.333333        3
    2           0.000000  0.000000  0.000000        1
    macro_avg   0.111111  0.111111  0.111111        6
    accuracy: 0.166667	macro_auc: 0.194444
    """

    def __init__(self, logger=logging.getLogger(), dump_file: (PATH_IO_TYPE, None) = False,
                 col: (int, None) = None, **kwargs):
        """

        Parameters
        ----------

        """
        self.logger = logger
        if dump_file is not False:
            # clean file
            with as_out_io(dump_file):
                pass
        self.log_f = dump_file
        self.col = col

    @classmethod
    def _loss_format(cls, name, value):
        return "%s: %s" % (name, value)

    def loss_format(self, loss_name_value):
        msg = []
        for name, value in loss_name_value.items():
            msg.append("Loss - " + self._loss_format(name, value))
        return " ".join(msg), loss_name_value

    @classmethod
    def format(cls, tips: str = None,
               iteration: int = None, train_time: float = None, loss_name_value: dict = None,
               eval_name_value: dict = None,
               extra_info: (dict, tuple) = None, keep: (set, str) = "msg",
               logger=logging.getLogger(), dump_file: (PATH_IO_TYPE, None) = False,
               col: (int, None) = None,
               *args, **kwargs):
        return cls(logger=logger, dump_file=dump_file, col=col)(
            tips=tips, iteration=iteration, train_time=train_time, loss_name_value=loss_name_value,
            eval_name_value=eval_name_value, extra_info=extra_info,
            dump=dump_file is not False, keep=keep, *args, **kwargs
        )

    @property
    def iteration_name(self):
        return "Iteration"

    @property
    def iteration_fmt(self):
        return self.iteration_name + " [{:d}]"

    def __call__(self, tips: str = None,
                 iteration: int = None, train_time: float = None, loss_name_value: dict = None,
                 eval_name_value: dict = None,
                 extra_info: (dict, tuple) = None,
                 dump: bool = True, keep: (set, str) = "data", *args, **kwargs):
        msg = []
        data = {}

        if tips is not None:
            msg.append("%s" % tips)

        if iteration is not None:
            msg.append(self.iteration_fmt.format(iteration))
            data[self.iteration_name] = iteration

        if train_time is not None:
            msg.append("Train Time-%.3fs" % train_time)
            data['train_time'] = train_time

        if loss_name_value is not None:
            loss_name_value = _to_dict(loss_name_value)
            assert isinstance(
                loss_name_value, dict
            ), "loss_name_value should be None, dict or tuple, " \
               "now is %s" % type(loss_name_value)
            _msg, _data = self.loss_format(loss_name_value)

            msg.append(
                _msg
            )
            data.update(
                _data
            )

        if extra_info is not None:
            extra_info = _to_dict(extra_info)
            assert isinstance(
                extra_info, dict
            ), "extra_info should be None, dict or tuple, " \
               "now is %s" % type(extra_info)
            msg.append(str(extra_info))
            data.update(extra_info)

        msg = ["\t".join([m for m in msg if m])]

        if eval_name_value is not None:
            eval_name_value = _to_dict(eval_name_value)
            assert isinstance(
                eval_name_value, dict
            ), "eval_name_value should be None, dict or tuple, " \
               "now is %s" % type(eval_name_value)
            msg.append(
                result_format(eval_name_value, col=self.col)
            )
            data.update(
                eval_name_value
            )

        msg = "\n".join([m for m in msg if m])

        if dump:
            logger = kwargs.get('logger', self.logger)
            logger.info("\n" + msg)
            log_f = kwargs.get('log_f', self.log_f)
            if log_f is not False:
                try:
                    with as_out_io(log_f, "a") as wf:
                        print(json.dumps(data, ensure_ascii=False), file=wf)
                except Exception as e:  # pragma: no cover
                    warnings.warn("Result dumping to file aborted: %s" % str(e))

        if keep is None:
            return msg
        elif isinstance(keep, str):
            keep = set(as_list(keep))

        if "msg" in keep and "data" in keep:
            return msg, data
        elif "msg" in keep:
            return msg
        elif "data" in keep:
            return data


[文档]def result_format(data: dict, col=None):
    """

    Parameters
    ----------
    data
    col

    Returns
    -------

    Examples
    --------
    >>> print(result_format({"a": 1, "b": 2}))    # doctest: +NORMALIZE_WHITESPACE
    a: 1	b: 2
    >>> print(result_format({"a": 1, "b": {"1": 0.1, "2": 0.3}, "c": {"1": 0.4, "2": 0.0}}))
         1    2
    b  0.1  0.3
    c  0.4  0.0
    a: 1
    """
    table = OrderedDict()
    series = OrderedDict()
    for key, value in data.items():
        if isinstance(value, dict):
            table[key] = value
        else:
            series[key] = value

    _ret = []
    if table:
        _ret.append(table_format(table))
    if series:
        _ret.append(series_format(series, col=col))

    return "\n".join(_ret)


eval_format = EvalFMT.format


[文档]class EpochEvalFMT(EvalFMT):
    """
    Examples
    --------
    >>> import numpy as np
    >>> from longling.ML.metrics import classification_report
    >>> y_true = np.array([0, 0, 1, 1, 2, 1])
    >>> y_pred = np.array([2, 1, 0, 1, 1, 0])
    >>> y_score = np.array([
    ...     [0.15, 0.4, 0.45],
    ...     [0.1, 0.9, 0.0],
    ...     [0.33333, 0.333333, 0.333333],
    ...     [0.15, 0.4, 0.45],
    ...     [0.1, 0.9, 0.0],
    ...     [0.33333, 0.333333, 0.333333]
    ... ])
    >>> print(EpochEvalFMT.format(
    ...     iteration=30,
    ...     eval_name_value=classification_report(y_true, y_pred, y_score)
    ... ))    # doctest: +NORMALIZE_WHITESPACE
    Epoch [30]
               precision    recall        f1  support
    0           0.000000  0.000000  0.000000        2
    1           0.333333  0.333333  0.333333        3
    2           0.000000  0.000000  0.000000        1
    macro_avg   0.111111  0.111111  0.111111        6
    accuracy: 0.166667	macro_auc: 0.194444
    """

    @property
    def iteration_name(self):
        return "Epoch"


[文档]class EpisodeEvalFMT(EvalFMT):
    """
    Examples
    --------
    >>> import numpy as np
    >>> from longling.ML.metrics import classification_report
    >>> y_true = np.array([0, 0, 1, 1, 2, 1])
    >>> y_pred = np.array([2, 1, 0, 1, 1, 0])
    >>> y_score = np.array([
    ...     [0.15, 0.4, 0.45],
    ...     [0.1, 0.9, 0.0],
    ...     [0.33333, 0.333333, 0.333333],
    ...     [0.15, 0.4, 0.45],
    ...     [0.1, 0.9, 0.0],
    ...     [0.33333, 0.333333, 0.333333]
    ... ])
    >>> print(EpisodeEvalFMT.format(
    ...     iteration=30,
    ...     eval_name_value=classification_report(y_true, y_pred, y_score)
    ... ))    # doctest: +NORMALIZE_WHITESPACE
    Episode [30]
               precision    recall        f1  support
    0           0.000000  0.000000  0.000000        2
    1           0.333333  0.333333  0.333333        3
    2           0.000000  0.000000  0.000000        1
    macro_avg   0.111111  0.111111  0.111111        6
    accuracy: 0.166667	macro_auc: 0.194444
    """

    @property
    def iteration_name(self):
        return "Episode"