longling.ML.metrics.ranking 源代码

# coding: utf-8
# 2021/6/21 @ tongshiwei
from tqdm import tqdm
from longling import as_list
from copy import deepcopy
from .utils import POrderedDict

__all__ = ["ranking_report"]


def ranking_auc(ranked_label):
    """
    Examples
    --------
    >>> ranking_auc([1, 1, 0, 0, 0])
    1.0
    >>> ranking_auc([0, 1, 0, 1, 0])
    0.5
    >>> ranking_auc([1, 0, 1, 0, 0])
    0.8333333333333334
    >>> ranking_auc([0, 0, 0, 1, 1])
    0.0
    """
    pos_num = sum(ranked_label)
    neg_num = len(ranked_label) - sum(ranked_label)
    if pos_num * neg_num == 0:  # pragma: no cover
        return 1
    return sum(
        [len(ranked_label[i + 1:]) - sum(ranked_label[i + 1:]) for i, score in enumerate(ranked_label) if score == 1]
    ) / (pos_num * neg_num)


[文档]def ranking_report(y_true, y_pred, k: (int, list) = None, continuous=False, coerce="ignore", pad_pred=-100, metrics=None, bottom=False, verbose=True) -> POrderedDict: r""" Parameters ---------- y_true y_pred k continuous coerce pad_pred metrics bottom verbose Returns ------- Examples -------- >>> y_true = [[1, 0, 0], [0, 0, 1]] >>> y_pred = [[0.75, 0.5, 1], [1, 0.2, 0.1]] >>> ranking_report(y_true, y_pred) # doctest: +NORMALIZE_WHITESPACE ndcg@k precision@k recall@k f1@k len@k support@k 1 1.000000 0.000000 0.0 0.0 1.0 2 3 0.565465 0.333333 1.0 0.5 3.0 2 5 0.565465 0.333333 1.0 0.5 3.0 2 10 0.565465 0.333333 1.0 0.5 3.0 2 auc: 0.250000 map: 0.416667 mrr: 0.416667 coverage_error: 2.500000 ranking_loss: 0.750000 len: 3.000000 support: 2 >>> ranking_report(y_true, y_pred, k=[1, 3, 5]) # doctest: +NORMALIZE_WHITESPACE ndcg@k precision@k recall@k f1@k len@k support@k 1 1.000000 0.000000 0.0 0.0 1.0 2 3 0.565465 0.333333 1.0 0.5 3.0 2 5 0.565465 0.333333 1.0 0.5 3.0 2 auc: 0.250000 map: 0.416667 mrr: 0.416667 coverage_error: 2.500000 ranking_loss: 0.750000 len: 3.000000 support: 2 >>> ranking_report(y_true, y_pred, bottom=True) # doctest: +NORMALIZE_WHITESPACE ndcg@k precision@k recall@k f1@k len@k support@k ndcg@k(B) \ 1 1.000000 0.000000 0.0 0.0 1.0 2 1.000000 3 0.565465 0.333333 1.0 0.5 3.0 2 0.806574 5 0.565465 0.333333 1.0 0.5 3.0 2 0.806574 10 0.565465 0.333333 1.0 0.5 3.0 2 0.806574 <BLANKLINE> precision@k(B) recall@k(B) f1@k(B) len@k(B) support@k(B) 1 0.500000 0.25 0.333333 1.0 2 3 0.666667 1.00 0.800000 3.0 2 5 0.666667 1.00 0.800000 3.0 2 10 0.666667 1.00 0.800000 3.0 2 auc: 0.250000 map: 0.416667 mrr: 0.416667 coverage_error: 2.500000 ranking_loss: 0.750000 len: 3.000000 support: 2 map(B): 0.708333 mrr(B): 0.750000 >>> ranking_report(y_true, y_pred, bottom=True, metrics=["auc"]) # doctest: +NORMALIZE_WHITESPACE auc: 0.250000 len: 3.000000 support: 2 >>> y_true = [[0.9, 0.7, 0.1], [0, 0.5, 1]] >>> y_pred = [[0.75, 0.5, 1], [1, 0.2, 0.1]] >>> ranking_report(y_true, y_pred, continuous=True) # doctest: +NORMALIZE_WHITESPACE ndcg@k len@k support@k 3 0.675647 3.0 2 5 0.675647 3.0 2 10 0.675647 3.0 2 mrr: 0.750000 len: 3.000000 support: 2 >>> y_true = [[1, 0], [0, 0, 1]] >>> y_pred = [[0.75, 0.5], [1, 0.2, 0.1]] >>> ranking_report(y_true, y_pred) # doctest: +NORMALIZE_WHITESPACE ndcg@k precision@k recall@k f1@k len@k support@k 1 1.00 0.500000 0.5 0.500000 1.0 2 3 0.75 0.416667 1.0 0.583333 2.5 2 5 0.75 0.416667 1.0 0.583333 2.5 2 10 0.75 0.416667 1.0 0.583333 2.5 2 auc: 0.500000 map: 0.666667 mrr: 0.666667 coverage_error: 2.000000 ranking_loss: 0.500000 len: 2.500000 support: 2 >>> ranking_report(y_true, y_pred, coerce="abandon") # doctest: +NORMALIZE_WHITESPACE ndcg@k precision@k recall@k f1@k len@k support@k 1 1.0 0.500000 0.5 0.5 1.0 2 3 0.5 0.333333 1.0 0.5 3.0 1 auc: 0.500000 map: 0.666667 mrr: 0.666667 coverage_error: 2.000000 ranking_loss: 0.500000 len: 2.500000 support: 2 >>> ranking_report(y_true, y_pred, coerce="padding") # doctest: +NORMALIZE_WHITESPACE ndcg@k precision@k recall@k f1@k len@k support@k 1 1.00 0.500000 0.5 0.500000 1.0 2 3 0.75 0.416667 1.0 0.583333 2.5 2 5 0.75 0.416667 1.0 0.583333 2.5 2 10 0.75 0.416667 1.0 0.583333 2.5 2 auc: 0.500000 map: 0.666667 mrr: 0.666667 coverage_error: 2.000000 ranking_loss: 0.500000 len: 2.500000 support: 2 >>> ranking_report(y_true, y_pred, bottom=True) # doctest: +NORMALIZE_WHITESPACE ndcg@k precision@k recall@k f1@k len@k support@k ndcg@k(B) \ 1 1.00 0.500000 0.5 0.500000 1.0 2 1.000000 3 0.75 0.416667 1.0 0.583333 2.5 2 0.846713 5 0.75 0.416667 1.0 0.583333 2.5 2 0.846713 10 0.75 0.416667 1.0 0.583333 2.5 2 0.846713 <BLANKLINE> precision@k(B) recall@k(B) f1@k(B) len@k(B) support@k(B) 1 0.500000 0.5 0.500000 1.0 2 3 0.583333 1.0 0.733333 2.5 2 5 0.583333 1.0 0.733333 2.5 2 10 0.583333 1.0 0.733333 2.5 2 auc: 0.500000 map: 0.666667 mrr: 0.666667 coverage_error: 2.000000 ranking_loss: 0.500000 len: 2.500000 support: 2 map(B): 0.791667 mrr(B): 0.750000 >>> ranking_report(y_true, y_pred, bottom=True, coerce="abandon") # doctest: +NORMALIZE_WHITESPACE ndcg@k precision@k recall@k f1@k len@k support@k ndcg@k(B) \ 1 1.0 0.500000 0.5 0.5 1.0 2 1.000000 3 0.5 0.333333 1.0 0.5 3.0 1 0.693426 <BLANKLINE> precision@k(B) recall@k(B) f1@k(B) len@k(B) support@k(B) 1 0.500000 0.5 0.5 1.0 2 3 0.666667 1.0 0.8 3.0 1 auc: 0.500000 map: 0.666667 mrr: 0.666667 coverage_error: 2.000000 ranking_loss: 0.500000 len: 2.500000 support: 2 map(B): 0.791667 mrr(B): 0.750000 >>> ranking_report(y_true, y_pred, bottom=True, coerce="padding") # doctest: +NORMALIZE_WHITESPACE ndcg@k precision@k recall@k f1@k len@k support@k ndcg@k(B) \ 1 1.00 0.500000 0.5 0.500000 1.0 2 1.000000 3 0.75 0.416667 1.0 0.583333 2.5 2 0.846713 5 0.75 0.416667 1.0 0.583333 2.5 2 0.846713 10 0.75 0.416667 1.0 0.583333 2.5 2 0.846713 <BLANKLINE> precision@k(B) recall@k(B) f1@k(B) len@k(B) support@k(B) 1 0.50 0.5 0.500000 1.0 2 3 0.50 1.0 0.650000 3.0 2 5 0.30 1.0 0.452381 5.0 2 10 0.15 1.0 0.257576 10.0 2 auc: 0.500000 map: 0.666667 mrr: 0.666667 coverage_error: 2.000000 ranking_loss: 0.500000 len: 2.500000 support: 2 map(B): 0.791667 mrr(B): 0.750000 """ import numpy as np from collections import OrderedDict from sklearn.metrics import ( label_ranking_average_precision_score, ndcg_score, label_ranking_loss, coverage_error ) assert coerce in {"ignore", "abandon", "raise", "padding"} if metrics is None: metrics = ["mrr", "ndcg"] if continuous is False: metrics.extend(["auc", "map", "coverage_error", "ranking_loss", "precision", "recall", "f1"]) metrics = set(metrics) if k is not None: k = as_list(k) else: if continuous is True: k = [3, 5, 10] else: k = [1, 3, 5, 10] results = { "auc": [], "map": [], "mrr": [], "coverage_error": [], "ranking_loss": [], "len": [], "support": [], } if bottom: results.update({ "map(B)": [], "mrr(B)": [], }) k_results = {} for _k in k: k_results[_k] = { "ndcg@k": [], "precision@k": [], "recall@k": [], "f1@k": [], "len@k": [], "support@k": [], } if bottom: k_results[_k].update({ "ndcg@k(B)": [], "precision@k(B)": [], "recall@k(B)": [], "f1@k(B)": [], "len@k(B)": [], "support@k(B)": [], }) suffix = [""] if bottom: suffix += ["(B)"] for label, pred in tqdm(zip(y_true, y_pred), "ranking metrics", disable=not verbose): if continuous is False and "map" in metrics: results["map"].append(label_ranking_average_precision_score([label], [pred])) if bottom: results["map(B)"].append(label_ranking_average_precision_score( [(1 - np.asarray(label)).tolist()], [(-np.asarray(pred)).tolist()] )) if len(label) > 1 and continuous is False: if "coverage_error" in metrics: results["coverage_error"].append(coverage_error([label], [pred])) if "ranking_loss" in metrics: results["ranking_loss"].append(label_ranking_loss([label], [pred])) results["len"].append(len(label)) results["support"].append(1) label_pred = list(sorted(zip(label, pred), key=lambda x: x[1], reverse=True)) sorted_label = list(zip(*label_pred))[0] if "auc" in metrics: results["auc"].append(ranking_auc(sorted_label)) if "mrr" in metrics: try: results["mrr"].append(1 / (np.asarray(sorted_label).nonzero()[0][0] + 1)) except IndexError: # pragma: no cover pass try: if bottom: results["mrr(B)"].append(1 / (np.asarray(sorted_label[::-1]).nonzero()[0][0] + 1)) except IndexError: # pragma: no cover pass if metrics & {"ndcg", "precision", "recall", "f1"}: for _k in k: for _suffix in suffix: if _suffix == "": _label_pred = deepcopy(label_pred) if len(_label_pred) < _k: if coerce == "ignore": pass elif coerce == "abandon": continue elif coerce == "raise": raise ValueError("Not enough value: %s vs target %s" % (len(_label_pred), _k)) elif coerce == "padding": # pragma: no cover _label_pred += [(0, pad_pred)] * (_k - len(_label_pred)) k_label_pred = label_pred[:_k] total_label = sum(label) else: inv_label_pred = [(1 - _l, -p) for _l, p in label_pred][::-1] if len(inv_label_pred) < _k: if coerce == "ignore": pass elif coerce == "abandon": continue elif coerce == "raise": # pragma: no cover raise ValueError("Not enough value: %s vs target %s" % (len(inv_label_pred), _k)) elif coerce == "padding": inv_label_pred += [(0, pad_pred)] * (_k - len(inv_label_pred)) k_label_pred = inv_label_pred[:_k] total_label = len(label) - sum(label) if not k_label_pred: # pragma: no cover continue k_label, k_pred = list(zip(*k_label_pred)) if "ndcg" in metrics: if len(k_label) == 1 and "ndcg" in metrics: k_results[_k]["ndcg@k%s" % _suffix].append(1) else: k_results[_k]["ndcg@k%s" % _suffix].append(ndcg_score([k_label], [k_pred])) p = sum(k_label) / len(k_label) r = sum(k_label) / total_label if total_label else 0 if "precision" in metrics: k_results[_k]["precision@k%s" % _suffix].append(p) if "recall" in metrics: k_results[_k]["recall@k%s" % _suffix].append(r) if "f1" in metrics: k_results[_k]["f1@k%s" % _suffix].append(2 * p * r / (p + r) if p + r else 0) k_results[_k]["len@k%s" % _suffix].append(len(k_label)) k_results[_k]["support@k%s" % _suffix].append(1) ret = POrderedDict() for key, value in results.items(): if value: if key == "support": ret[key] = np.sum(value).item() else: ret[key] = np.mean(value).item() if metrics & {"ndcg", "precision", "recall", "f1"}: for k, key_value in k_results.items(): ret[k] = OrderedDict() for key, value in key_value.items(): if value: if key in {"support@k", "support@k(B)"}: ret[k][key] = np.sum(value).item() else: ret[k][key] = np.mean(value).item() return ret