longling.ML.metrics.ranking 源代码

# coding: utf-8
# 2021/6/21 @ tongshiwei
from tqdm import tqdm
from longling import as_list
from copy import deepcopy
from .utils import POrderedDict

__all__ = ["ranking_report"]


def ranking_auc(ranked_label):
    """
    Examples
    --------
    >>> ranking_auc([1, 1, 0, 0, 0])
    1.0
    >>> ranking_auc([0, 1, 0, 1, 0])
    0.5
    >>> ranking_auc([1, 0, 1, 0, 0])
    0.8333333333333334
    >>> ranking_auc([0, 0, 0, 1, 1])
    0.0
    """
    pos_num = sum(ranked_label)
    neg_num = len(ranked_label) - sum(ranked_label)
    if pos_num * neg_num == 0:  # pragma: no cover
        return 1
    return sum(
        [len(ranked_label[i + 1:]) - sum(ranked_label[i + 1:]) for i, score in enumerate(ranked_label) if score == 1]
    ) / (pos_num * neg_num)


[文档]def ranking_report(y_true, y_pred, k: (int, list) = None, continuous=False, coerce="ignore", pad_pred=-100,
                   metrics=None, bottom=False, verbose=True) -> POrderedDict:
    r"""

    Parameters
    ----------
    y_true
    y_pred
    k
    continuous
    coerce
    pad_pred
    metrics
    bottom
    verbose

    Returns
    -------

    Examples
    --------
    >>> y_true = [[1, 0, 0], [0, 0, 1]]
    >>> y_pred = [[0.75, 0.5, 1], [1, 0.2, 0.1]]
    >>> ranking_report(y_true, y_pred)  # doctest: +NORMALIZE_WHITESPACE
           ndcg@k  precision@k  recall@k  f1@k  len@k  support@k
    1   1.000000     0.000000       0.0   0.0    1.0          2
    3   0.565465     0.333333       1.0   0.5    3.0          2
    5   0.565465     0.333333       1.0   0.5    3.0          2
    10  0.565465     0.333333       1.0   0.5    3.0          2
    auc: 0.250000	map: 0.416667	mrr: 0.416667	coverage_error: 2.500000	ranking_loss: 0.750000	len: 3.000000
    support: 2
    >>> ranking_report(y_true, y_pred, k=[1, 3, 5])  # doctest: +NORMALIZE_WHITESPACE
           ndcg@k  precision@k  recall@k  f1@k  len@k  support@k
    1   1.000000     0.000000       0.0   0.0    1.0          2
    3   0.565465     0.333333       1.0   0.5    3.0          2
    5   0.565465     0.333333       1.0   0.5    3.0          2
    auc: 0.250000	map: 0.416667	mrr: 0.416667	coverage_error: 2.500000	ranking_loss: 0.750000	len: 3.000000
    support: 2
    >>> ranking_report(y_true, y_pred, bottom=True)  # doctest: +NORMALIZE_WHITESPACE
              ndcg@k  precision@k  recall@k  f1@k  len@k  support@k  ndcg@k(B) \
    1   1.000000     0.000000       0.0   0.0    1.0          2   1.000000
    3   0.565465     0.333333       1.0   0.5    3.0          2   0.806574
    5   0.565465     0.333333       1.0   0.5    3.0          2   0.806574
    10  0.565465     0.333333       1.0   0.5    3.0          2   0.806574
    <BLANKLINE>
        precision@k(B)  recall@k(B)   f1@k(B)  len@k(B)  support@k(B)
    1         0.500000         0.25  0.333333       1.0             2
    3         0.666667         1.00  0.800000       3.0             2
    5         0.666667         1.00  0.800000       3.0             2
    10        0.666667         1.00  0.800000       3.0             2
    auc: 0.250000	map: 0.416667	mrr: 0.416667	coverage_error: 2.500000	ranking_loss: 0.750000	len: 3.000000
    support: 2	map(B): 0.708333	mrr(B): 0.750000
    >>> ranking_report(y_true, y_pred, bottom=True, metrics=["auc"])  # doctest: +NORMALIZE_WHITESPACE
    auc: 0.250000   len: 3.000000	support: 2
    >>> y_true = [[0.9, 0.7, 0.1], [0, 0.5, 1]]
    >>> y_pred = [[0.75, 0.5, 1], [1, 0.2, 0.1]]
    >>> ranking_report(y_true, y_pred, continuous=True)  # doctest: +NORMALIZE_WHITESPACE
          ndcg@k  len@k  support@k
    3   0.675647    3.0          2
    5   0.675647    3.0          2
    10  0.675647    3.0          2
    mrr: 0.750000	len: 3.000000	support: 2
    >>> y_true = [[1, 0], [0, 0, 1]]
    >>> y_pred = [[0.75, 0.5], [1, 0.2, 0.1]]
    >>> ranking_report(y_true, y_pred)  # doctest: +NORMALIZE_WHITESPACE
        ndcg@k  precision@k  recall@k      f1@k  len@k  support@k
    1     1.00     0.500000       0.5  0.500000    1.0          2
    3     0.75     0.416667       1.0  0.583333    2.5          2
    5     0.75     0.416667       1.0  0.583333    2.5          2
    10    0.75     0.416667       1.0  0.583333    2.5          2
    auc: 0.500000	map: 0.666667	mrr: 0.666667	coverage_error: 2.000000	ranking_loss: 0.500000	len: 2.500000
    support: 2
    >>> ranking_report(y_true, y_pred, coerce="abandon")  # doctest: +NORMALIZE_WHITESPACE
       ndcg@k  precision@k  recall@k  f1@k  len@k  support@k
    1     1.0     0.500000       0.5   0.5    1.0          2
    3     0.5     0.333333       1.0   0.5    3.0          1
    auc: 0.500000	map: 0.666667	mrr: 0.666667	coverage_error: 2.000000	ranking_loss: 0.500000	len: 2.500000
    support: 2
    >>> ranking_report(y_true, y_pred, coerce="padding")  # doctest: +NORMALIZE_WHITESPACE
        ndcg@k  precision@k  recall@k      f1@k  len@k  support@k
    1     1.00     0.500000       0.5  0.500000    1.0          2
    3     0.75     0.416667       1.0  0.583333    2.5          2
    5     0.75     0.416667       1.0  0.583333    2.5          2
    10    0.75     0.416667       1.0  0.583333    2.5          2
    auc: 0.500000	map: 0.666667	mrr: 0.666667	coverage_error: 2.000000	ranking_loss: 0.500000	len: 2.500000
    support: 2
    >>> ranking_report(y_true, y_pred, bottom=True)  # doctest: +NORMALIZE_WHITESPACE
        ndcg@k  precision@k  recall@k      f1@k  len@k  support@k  ndcg@k(B)  \
    1     1.00     0.500000       0.5  0.500000    1.0          2   1.000000
    3     0.75     0.416667       1.0  0.583333    2.5          2   0.846713
    5     0.75     0.416667       1.0  0.583333    2.5          2   0.846713
    10    0.75     0.416667       1.0  0.583333    2.5          2   0.846713
    <BLANKLINE>
        precision@k(B)  recall@k(B)   f1@k(B)  len@k(B)  support@k(B)
    1         0.500000          0.5  0.500000       1.0             2
    3         0.583333          1.0  0.733333       2.5             2
    5         0.583333          1.0  0.733333       2.5             2
    10        0.583333          1.0  0.733333       2.5             2
    auc: 0.500000	map: 0.666667	mrr: 0.666667	coverage_error: 2.000000	ranking_loss: 0.500000	len: 2.500000
    support: 2	map(B): 0.791667	mrr(B): 0.750000
    >>> ranking_report(y_true, y_pred, bottom=True, coerce="abandon")  # doctest: +NORMALIZE_WHITESPACE
       ndcg@k  precision@k  recall@k  f1@k  len@k  support@k  ndcg@k(B)  \
    1     1.0     0.500000       0.5   0.5    1.0          2   1.000000
    3     0.5     0.333333       1.0   0.5    3.0          1   0.693426
    <BLANKLINE>
       precision@k(B)  recall@k(B)  f1@k(B)  len@k(B)  support@k(B)
    1        0.500000          0.5      0.5       1.0             2
    3        0.666667          1.0      0.8       3.0             1
    auc: 0.500000	map: 0.666667	mrr: 0.666667	coverage_error: 2.000000	ranking_loss: 0.500000
    len: 2.500000	support: 2	map(B): 0.791667	mrr(B): 0.750000
    >>> ranking_report(y_true, y_pred, bottom=True, coerce="padding")  # doctest: +NORMALIZE_WHITESPACE
        ndcg@k  precision@k  recall@k      f1@k  len@k  support@k  ndcg@k(B)  \
    1     1.00     0.500000       0.5  0.500000    1.0          2   1.000000
    3     0.75     0.416667       1.0  0.583333    2.5          2   0.846713
    5     0.75     0.416667       1.0  0.583333    2.5          2   0.846713
    10    0.75     0.416667       1.0  0.583333    2.5          2   0.846713
    <BLANKLINE>
        precision@k(B)  recall@k(B)   f1@k(B)  len@k(B)  support@k(B)
    1             0.50          0.5  0.500000       1.0             2
    3             0.50          1.0  0.650000       3.0             2
    5             0.30          1.0  0.452381       5.0             2
    10            0.15          1.0  0.257576      10.0             2
    auc: 0.500000	map: 0.666667	mrr: 0.666667	coverage_error: 2.000000	ranking_loss: 0.500000	len: 2.500000
    support: 2	map(B): 0.791667	mrr(B): 0.750000
    """
    import numpy as np
    from collections import OrderedDict
    from sklearn.metrics import (
        label_ranking_average_precision_score,
        ndcg_score, label_ranking_loss, coverage_error
    )
    assert coerce in {"ignore", "abandon", "raise", "padding"}
    if metrics is None:
        metrics = ["mrr", "ndcg"]
        if continuous is False:
            metrics.extend(["auc", "map", "coverage_error", "ranking_loss", "precision", "recall", "f1"])
    metrics = set(metrics)

    if k is not None:
        k = as_list(k)
    else:
        if continuous is True:
            k = [3, 5, 10]
        else:
            k = [1, 3, 5, 10]

    results = {
        "auc": [],
        "map": [],
        "mrr": [],
        "coverage_error": [],
        "ranking_loss": [],
        "len": [],
        "support": [],
    }
    if bottom:
        results.update({
            "map(B)": [],
            "mrr(B)": [],
        })
    k_results = {}
    for _k in k:
        k_results[_k] = {
            "ndcg@k": [],
            "precision@k": [],
            "recall@k": [],
            "f1@k": [],
            "len@k": [],
            "support@k": [],
        }
        if bottom:
            k_results[_k].update({
                "ndcg@k(B)": [],
                "precision@k(B)": [],
                "recall@k(B)": [],
                "f1@k(B)": [],
                "len@k(B)": [],
                "support@k(B)": [],
            })
    suffix = [""]
    if bottom:
        suffix += ["(B)"]

    for label, pred in tqdm(zip(y_true, y_pred), "ranking metrics", disable=not verbose):
        if continuous is False and "map" in metrics:
            results["map"].append(label_ranking_average_precision_score([label], [pred]))
            if bottom:
                results["map(B)"].append(label_ranking_average_precision_score(
                    [(1 - np.asarray(label)).tolist()],
                    [(-np.asarray(pred)).tolist()]
                ))

        if len(label) > 1 and continuous is False:
            if "coverage_error" in metrics:
                results["coverage_error"].append(coverage_error([label], [pred]))
            if "ranking_loss" in metrics:
                results["ranking_loss"].append(label_ranking_loss([label], [pred]))

        results["len"].append(len(label))
        results["support"].append(1)
        label_pred = list(sorted(zip(label, pred), key=lambda x: x[1], reverse=True))
        sorted_label = list(zip(*label_pred))[0]
        if "auc" in metrics:
            results["auc"].append(ranking_auc(sorted_label))
        if "mrr" in metrics:
            try:
                results["mrr"].append(1 / (np.asarray(sorted_label).nonzero()[0][0] + 1))
            except IndexError:  # pragma: no cover
                pass
            try:
                if bottom:
                    results["mrr(B)"].append(1 / (np.asarray(sorted_label[::-1]).nonzero()[0][0] + 1))
            except IndexError:  # pragma: no cover
                pass

        if metrics & {"ndcg", "precision", "recall", "f1"}:
            for _k in k:
                for _suffix in suffix:
                    if _suffix == "":
                        _label_pred = deepcopy(label_pred)
                        if len(_label_pred) < _k:
                            if coerce == "ignore":
                                pass
                            elif coerce == "abandon":
                                continue
                            elif coerce == "raise":
                                raise ValueError("Not enough value: %s vs target %s" % (len(_label_pred), _k))
                            elif coerce == "padding":  # pragma: no cover
                                _label_pred += [(0, pad_pred)] * (_k - len(_label_pred))
                        k_label_pred = label_pred[:_k]
                        total_label = sum(label)
                    else:
                        inv_label_pred = [(1 - _l, -p) for _l, p in label_pred][::-1]
                        if len(inv_label_pred) < _k:
                            if coerce == "ignore":
                                pass
                            elif coerce == "abandon":
                                continue
                            elif coerce == "raise":  # pragma: no cover
                                raise ValueError("Not enough value: %s vs target %s" % (len(inv_label_pred), _k))
                            elif coerce == "padding":
                                inv_label_pred += [(0, pad_pred)] * (_k - len(inv_label_pred))
                        k_label_pred = inv_label_pred[:_k]
                        total_label = len(label) - sum(label)

                    if not k_label_pred:  # pragma: no cover
                        continue
                    k_label, k_pred = list(zip(*k_label_pred))
                    if "ndcg" in metrics:
                        if len(k_label) == 1 and "ndcg" in metrics:
                            k_results[_k]["ndcg@k%s" % _suffix].append(1)
                        else:
                            k_results[_k]["ndcg@k%s" % _suffix].append(ndcg_score([k_label], [k_pred]))
                    p = sum(k_label) / len(k_label)
                    r = sum(k_label) / total_label if total_label else 0
                    if "precision" in metrics:
                        k_results[_k]["precision@k%s" % _suffix].append(p)
                    if "recall" in metrics:
                        k_results[_k]["recall@k%s" % _suffix].append(r)
                    if "f1" in metrics:
                        k_results[_k]["f1@k%s" % _suffix].append(2 * p * r / (p + r) if p + r else 0)
                    k_results[_k]["len@k%s" % _suffix].append(len(k_label))
                    k_results[_k]["support@k%s" % _suffix].append(1)

    ret = POrderedDict()
    for key, value in results.items():
        if value:
            if key == "support":
                ret[key] = np.sum(value).item()
            else:
                ret[key] = np.mean(value).item()

    if metrics & {"ndcg", "precision", "recall", "f1"}:
        for k, key_value in k_results.items():
            ret[k] = OrderedDict()
            for key, value in key_value.items():
                if value:
                    if key in {"support@k", "support@k(B)"}:
                        ret[k][key] = np.sum(value).item()
                    else:
                        ret[k][key] = np.mean(value).item()
    return ret