General Toolkit¶

analyser¶

longling.ML.toolkit.analyser.get_max(src: ((<class 'str'>, <class 'pathlib.PurePath'>), <class 'list'>), *keys, with_keys: (<class 'str'>, None) = None, with_all=False, merge=True)[源代码]¶

实际案例

>>> src = [
... {"Epoch": 0, "macro avg": {"f1": 0.7}, "loss": 0.04, "accuracy": 0.7},
... {"Epoch": 1, "macro avg": {"f1": 0.88}, "loss": 0.03, "accuracy": 0.8},
... {"Epoch": 1, "macro avg": {"f1": 0.7}, "loss": 0.02, "accuracy": 0.66}
... ]
>>> result, _ = get_max(src, "accuracy", merge=False)
>>> result
{'accuracy': 0.8}
>>> _, result_appendix = get_max(src, "accuracy", with_all=True, merge=False)
>>> result_appendix
{'accuracy': {'Epoch': 1, 'macro avg': {'f1': 0.88}, 'loss': 0.03, 'accuracy': 0.8}}
>>> result, result_appendix = get_max(src, "accuracy", "macro avg:f1", with_keys="Epoch", merge=False)
>>> result
{'accuracy': 0.8, 'macro avg:f1': 0.88}
>>> result_appendix
{'accuracy': {'Epoch': 1}, 'macro avg:f1': {'Epoch': 1}}
>>> get_max(src, "accuracy", "macro avg:f1", with_keys="Epoch")
{'accuracy': {'Epoch': 1, 'accuracy': 0.8}, 'macro avg:f1': {'Epoch': 1, 'macro avg:f1': 0.88}}

longling.ML.toolkit.analyser.get_min(src: ((<class 'str'>, <class 'pathlib.PurePath'>), <class 'list'>), *keys, with_keys: (<class 'str'>, None) = None, with_all=False, merge=True)[源代码]¶

>>> src = [
... {"Epoch": 0, "macro avg": {"f1": 0.7}, "loss": 0.04, "accuracy": 0.7},
... {"Epoch": 1, "macro avg": {"f1": 0.88}, "loss": 0.03, "accuracy": 0.8},
... {"Epoch": 1, "macro avg": {"f1": 0.7}, "loss": 0.02, "accuracy": 0.66}
... ]
>>> get_min(src, "loss")
{'loss': 0.02}

longling.ML.toolkit.analyser.key_parser(key)[源代码]¶

实际案例

>>> key_parser("macro avg:f1")
['macro avg', 'f1']
>>> key_parser("accuracy")
'accuracy'
>>> key_parser("iteration:accuracy")
['iteration', 'accuracy']

dataset¶

class longling.ML.toolkit.dataset.ID2Feature(feature_df: pandas.core.frame.DataFrame, id_field=None, set_index=False)[源代码]¶

实际案例

>>> import pandas as pd
>>> df = pd.DataFrame({"id": [0, 1, 2, 3, 4], "numeric": [1, 2, 3, 4, 5], "text": ["a", "b", "c", "d", "e"]})
>>> i2f = ID2Feature(df, id_field="id", set_index=True)
>>> i2f[2]
numeric    3
text       c
Name: 2, dtype: object
>>> i2f[[2, 3]]["numeric"]
id
2    3
3    4
Name: numeric, dtype: int64
>>> i2f(2)
[3, 'c']
>>> i2f([2, 3])
[[3, 'c'], [4, 'd']]

class longling.ML.toolkit.dataset.ItemSpecificSampler(triplet_df: pandas.core.frame.DataFrame, query_field='item_id', pos_field='pos', neg_field='neg', set_index=False, item_id_range=None, user_id_range=None, random_state=10)[源代码]¶

实际案例

>>> import pandas as pd
>>> user_num = 3
>>> item_num = 4
>>> rating_matrix = pd.DataFrame({
...     "user_id": [0, 1, 1, 1, 2],
...     "item_id": [1, 3, 0, 2, 1]
... })
>>> triplet_df = ItemSpecificSampler.rating2triplet(rating_matrix)
>>> triplet_df   # doctest: +NORMALIZE_WHITESPACE
            pos neg
item_id
0           [1]  []
1        [0, 2]  []
2           [1]  []
3           [1]  []
>>> triplet_df.index
Int64Index([0, 1, 2, 3], dtype='int64', name='item_id')
>>> sampler = ItemSpecificSampler(triplet_df)
>>> sampler(1)
(0, [0])
>>> sampler = ItemSpecificSampler(triplet_df, user_id_range=user_num)
>>> sampler(0, implicit=True)
(1, [2])
>>> sampler(0, 5, implicit=True)
(2, [2, 0, 0, 0, 0])
>>> sampler(0, 5, implicit=True, pad_value=-1)
(2, [0, 2, -1, -1, -1])
>>> sampler([0, 1, 2], 5, implicit=True, pad_value=-1)
[(2, [0, 2, -1, -1, -1]), (1, [1, -1, -1, -1, -1]), (2, [0, 2, -1, -1, -1])]
>>> rating_matrix = pd.DataFrame({
...     "user_id": [0, 1, 1, 1, 2],
...     "item_id": [1, 3, 0, 2, 1],
...     "score": [1, 0, 1, 1, 0]
... })
>>> triplet_df = ItemSpecificSampler.rating2triplet(rating_matrix=rating_matrix, value_field="score")
>>> triplet_df   # doctest: +NORMALIZE_WHITESPACE
         pos  neg
item_id
0        [1]   []
1        [0]  [2]
2        [1]   []
3         []  [1]
>>> sampler = UserSpecificPairSampler(triplet_df)
>>> sampler([0, 1, 2], 5, pad_value=-1)
[(0, [-1, -1, -1, -1, -1]), (1, [2, -1, -1, -1, -1]), (0, [-1, -1, -1, -1, -1])]
>>> sampler([0, 1, 2], 5, neg=False, pad_value=-1)
[(1, [1, -1, -1, -1, -1]), (1, [0, -1, -1, -1, -1]), (1, [1, -1, -1, -1, -1])]
>>> sampler(rating_matrix["item_id"], 2, neg=rating_matrix["score"],
...     excluded_key=rating_matrix["user_id"], pad_value=-1)
[(1, [2, -1]), (0, [-1, -1]), (0, [-1, -1]), (0, [-1, -1]), (1, [0, -1])]
>>> sampler(rating_matrix["item_id"], 2, neg=rating_matrix["score"],
...     excluded_key=rating_matrix["user_id"], pad_value=-1, return_column=True)
((1, 0, 0, 0, 1), ([2, -1], [-1, -1], [-1, -1], [-1, -1], [0, -1]))
>>> sampler(rating_matrix["item_id"], 2, neg=rating_matrix["score"],
...     excluded_key=rating_matrix["user_id"], pad_value=-1, return_column=True, split_sample_to_column=True)
((1, 0, 0, 0, 1), [(2, -1, -1, -1, 0), (-1, -1, -1, -1, -1)])

class longling.ML.toolkit.dataset.TripletPairSampler(triplet_df: pandas.core.frame.DataFrame, query_field, pos_field='pos', neg_field='neg', set_index=False, query_range: (<class 'int'>, <class 'tuple'>, <class 'list'>) = None, key_range: (<class 'int'>, <class 'tuple'>, <class 'list'>) = None, random_state=10)[源代码]¶

实际案例

>>> # implicit feedback
>>> import pandas as pd
>>> triplet_df = pd.DataFrame({
...     "query": [0, 1, 2],
...     "pos": [[1], [3, 0, 2], [1]],
...     "neg": [[], [], []]
... })
>>> sampler = TripletPairSampler(triplet_df, "query", set_index=True)
>>> rating_matrix = pd.DataFrame({
...     "query": [0, 1, 1, 1, 2],
...     "key": [1, 3, 0, 2, 1]
... })
>>> triplet_df = TripletPairSampler.rating2triplet(rating_matrix, query_field="query", key_field="key")
>>> triplet_df   # doctest: +NORMALIZE_WHITESPACE
             pos neg
query
0            [1]  []
1      [3, 0, 2]  []
2            [1]  []
>>> sampler = TripletPairSampler(triplet_df, "query")
>>> sampler(0)
(0, [0])
>>> sampler(0, 3)
(0, [0, 0, 0])
>>> sampler(0, 3, padding=False)
(0, [])
>>> sampler = TripletPairSampler(triplet_df, "query", query_range=3, key_range=4)
>>> sampler(0)
(0, [0])
>>> sampler(0, 3)
(0, [0, 0, 0])
>>> sampler(0, 3, padding=False)
(0, [])
>>> sampler(0, 5, padding=False, implicit=True)
(3, [2, 3, 0])
>>> sampler(0, 5, padding=False, implicit=True, excluded_key=[3])
(2, [0, 2])
>>> sampler(0, 5, padding=True, implicit=True, excluded_key=[3])
(2, [2, 0, 0, 0, 0])
>>> sampler(0, 5, implicit=True, pad_value=-1)
(3, [2, 3, 0, -1, -1])
>>> sampler(0, 5, implicit=True, fast_implicit=True, pad_value=-1)
(3, [0, 2, 3, -1, -1])
>>> sampler(0, 5, implicit=True, fast_implicit=True, with_n_implicit=3, pad_value=-1)
(3, [0, 2, 3, -1, -1, -1, -1, -1])
>>> sampler(0, 5, implicit=True, fast_implicit=True, with_n_implicit=3, pad_value=-1, padding_implicit=True)
(3, [0, 2, 3, -1, -1, -1, -1, -1])
>>> rating_matrix = pd.DataFrame({
...     "query": [0, 1, 1, 1, 2],
...     "key": [1, 3, 0, 2, 1],
...     "score": [1, 0, 1, 1, 0]
... })
>>> triplet_df = TripletPairSampler.rating2triplet(
...     rating_matrix,
...     "query", "key",
...     value_field="score"
... )
>>> triplet_df   # doctest: +NORMALIZE_WHITESPACE
            pos  neg
query
0           [1]   []
1        [0, 2]  [3]
2            []  [1]
>>> sampler = TripletPairSampler(triplet_df, "query", query_range=3, key_range=4)
>>> sampler([0, 1, 2], 5, implicit=True, pad_value=-1)
[(3, [2, 3, 0, -1, -1]), (1, [1, -1, -1, -1, -1]), (3, [3, 0, 2, -1, -1])]
>>> sampler([0, 1, 2], 5, pad_value=-1)
[(0, [-1, -1, -1, -1, -1]), (1, [3, -1, -1, -1, -1]), (1, [1, -1, -1, -1, -1])]
>>> sampler([0, 1, 2], 5, neg=False, pad_value=-1)
[(1, [1, -1, -1, -1, -1]), (2, [0, 2, -1, -1, -1]), (0, [-1, -1, -1, -1, -1])]
>>> sampler(rating_matrix["query"], 2, neg=rating_matrix["score"],
...     excluded_key=rating_matrix["key"], pad_value=-1)
[(0, [-1, -1]), (2, [2, 0]), (1, [3, -1]), (1, [3, -1]), (0, [-1, -1])]
>>> sampler(rating_matrix["query"], 2, neg=rating_matrix["score"],
...     excluded_key=rating_matrix["key"], pad_value=-1, return_column=True)
((0, 2, 1, 1, 0), ([-1, -1], [0, 2], [3, -1], [3, -1], [-1, -1]))
>>> sampler(rating_matrix["query"], 2, neg=rating_matrix["score"],
...     excluded_key=rating_matrix["key"], pad_value=-1, return_column=True, split_sample_to_column=True)
((0, 2, 1, 1, 0), [(-1, 0, 3, 3, -1), (-1, 2, -1, -1, -1)])
>>> rating_matrix = pd.DataFrame({
...     "query": [0, 1, 1, 1, 2],
...     "key": [1, 3, 0, 2, 1],
...     "score": [0.8, 0.4, 0.7, 0.5, 0.1]
... })
>>> TripletPairSampler.rating2triplet(
...     rating_matrix,
...     "query", "key",
...     value_field="score",
...     value_threshold=0.5
... )   # doctest: +NORMALIZE_WHITESPACE
          pos  neg
query
0         [1]   []
1      [0, 2]  [3]
2          []  [1]

class longling.ML.toolkit.dataset.UserSpecificPairSampler(triplet_df: pandas.core.frame.DataFrame, query_field='user_id', pos_field='pos', neg_field='neg', set_index=False, user_id_range=None, item_id_range=None, random_state=10)[源代码]¶

实际案例

>>> import pandas as pd
>>> user_num = 3
>>> item_num = 4
>>> rating_matrix = pd.DataFrame({
...     "user_id": [0, 1, 1, 1, 2],
...     "item_id": [1, 3, 0, 2, 1]
... })
>>> triplet_df = UserSpecificPairSampler.rating2triplet(rating_matrix)
>>> triplet_df   # doctest: +NORMALIZE_WHITESPACE
               pos neg
user_id
0              [1]  []
1        [3, 0, 2]  []
2              [1]  []
>>> sampler = UserSpecificPairSampler(triplet_df)
>>> sampler(1)
(0, [0])
>>> sampler = UserSpecificPairSampler(triplet_df, item_id_range=item_num)
>>> sampler(0, implicit=True)
(1, [3])
>>> sampler(0, 5, implicit=True)
(3, [3, 2, 0, 0, 0])
>>> sampler(0, 5, implicit=True, pad_value=-1)
(3, [3, 2, 0, -1, -1])
>>> sampler([0, 1, 2], 5, implicit=True, pad_value=-1)
[(3, [2, 3, 0, -1, -1]), (1, [1, -1, -1, -1, -1]), (3, [2, 0, 3, -1, -1])]
>>> rating_matrix = pd.DataFrame({
...     "user_id": [0, 1, 1, 1, 2],
...     "item_id": [1, 3, 0, 2, 1],
...     "score": [1, 0, 1, 1, 0]
... })
>>> triplet_df = UserSpecificPairSampler.rating2triplet(rating_matrix=rating_matrix, value_field="score")
>>> triplet_df   # doctest: +NORMALIZE_WHITESPACE
            pos  neg
user_id
0           [1]   []
1        [0, 2]  [3]
2            []  [1]
>>> sampler = UserSpecificPairSampler(triplet_df)
>>> sampler([0, 1, 2], 5, pad_value=-1)
[(0, [-1, -1, -1, -1, -1]), (1, [3, -1, -1, -1, -1]), (1, [1, -1, -1, -1, -1])]
>>> sampler([0, 1, 2], 5, neg=False, pad_value=-1)
[(1, [1, -1, -1, -1, -1]), (2, [0, 2, -1, -1, -1]), (0, [-1, -1, -1, -1, -1])]
>>> sampler(rating_matrix["user_id"], 2, neg=rating_matrix["score"],
...     excluded_key=rating_matrix["item_id"], pad_value=-1)
[(0, [-1, -1]), (2, [2, 0]), (1, [3, -1]), (1, [3, -1]), (0, [-1, -1])]
>>> sampler(rating_matrix["user_id"], 2, neg=rating_matrix["score"],
...     excluded_key=rating_matrix["item_id"], pad_value=-1, return_column=True)
((0, 2, 1, 1, 0), ([-1, -1], [0, 2], [3, -1], [3, -1], [-1, -1]))
>>> sampler(rating_matrix["user_id"], 2, neg=rating_matrix["score"],
...     excluded_key=rating_matrix["item_id"], pad_value=-1, return_column=True, split_sample_to_column=True)
((0, 2, 1, 1, 0), [(-1, 2, 3, 3, -1), (-1, 0, -1, -1, -1)])

longling.ML.toolkit.dataset.train_test(*files, train_size: (<class 'float'>, <class 'int'>) = 0.8, test_size: (<class 'float'>, <class 'int'>, None) = None, ratio=None, random_state=None, shuffle=True, target_names=None, suffix: list = None, prefix='', logger=<Logger dataset (INFO)>, **kwargs)[源代码]¶

参数:

files --
train_size (float, int, or None, (default=0.8)) -- Represent the proportion of the dataset to include in the train split.
test_size (float, int, or None) -- Represent the proportion of the dataset to include in the train split.
random_state (int, RandomState instance or None, optional (default=None)) -- If int, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; If None, the random number generator is the RandomState instance used by np.random.
shuffle (boolean, optional (default=True)) -- Whether or not to shuffle the data before splitting. If shuffle=False then stratify must be None
target_names (list of PATH_TYPE) --
suffix (list) --
kwargs --

longling.ML.toolkit.dataset.train_valid_test(*files, train_size: (<class 'float'>, <class 'int'>) = 0.8, valid_size: (<class 'float'>, <class 'int'>) = 0.1, test_size: (<class 'float'>, <class 'int'>, None) = None, ratio=None, random_state=None, shuffle=True, target_names=None, suffix: list = None, logger=<Logger dataset (INFO)>, prefix='', **kwargs)[源代码]¶

参数:

files --
train_size (float, int, or None, (default=0.8)) -- Represent the proportion of the dataset to include in the train split.
valid_size (float, int, or None, (default=0.1)) -- Represent the proportion of the dataset to include in the valid split.
test_size (float, int, or None) -- Represent the proportion of the dataset to include in the test split.
random_state (int, RandomState instance or None, optional (default=None)) -- If int, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; If None, the random number generator is the RandomState instance used by np.random.
shuffle (boolean, optional (default=True)) -- Whether or not to shuffle the data before splitting. If shuffle=False then stratify must be None
target_names --
suffix (list) --
kwargs --

formatter¶

class longling.ML.toolkit.formatter.EpisodeEvalFMT(logger=<RootLogger root (WARNING)>, dump_file: (((<class 'str'>, <class 'pathlib.PurePath'>), (<class '_io.TextIOWrapper'>, <class 'typing.TextIO'>, <class 'typing.BinaryIO'>, <class 'codecs.StreamReaderWriter'>, <class 'fileinput.FileInput'>)), None) = False, col: (<class 'int'>, None) = None, **kwargs)[源代码]¶

实际案例

>>> import numpy as np
>>> from longling.ML.metrics import classification_report
>>> y_true = np.array([0, 0, 1, 1, 2, 1])
>>> y_pred = np.array([2, 1, 0, 1, 1, 0])
>>> y_score = np.array([
...     [0.15, 0.4, 0.45],
...     [0.1, 0.9, 0.0],
...     [0.33333, 0.333333, 0.333333],
...     [0.15, 0.4, 0.45],
...     [0.1, 0.9, 0.0],
...     [0.33333, 0.333333, 0.333333]
... ])
>>> print(EpisodeEvalFMT.format(
...     iteration=30,
...     eval_name_value=classification_report(y_true, y_pred, y_score)
... ))    # doctest: +NORMALIZE_WHITESPACE
Episode [30]
           precision    recall        f1  support
0           0.000000  0.000000  0.000000        2
1           0.333333  0.333333  0.333333        3
2           0.000000  0.000000  0.000000        1
macro_avg   0.111111  0.111111  0.111111        6
accuracy: 0.166667  macro_auc: 0.194444

class longling.ML.toolkit.formatter.EpochEvalFMT(logger=<RootLogger root (WARNING)>, dump_file: (((<class 'str'>, <class 'pathlib.PurePath'>), (<class '_io.TextIOWrapper'>, <class 'typing.TextIO'>, <class 'typing.BinaryIO'>, <class 'codecs.StreamReaderWriter'>, <class 'fileinput.FileInput'>)), None) = False, col: (<class 'int'>, None) = None, **kwargs)[源代码]¶

实际案例

>>> import numpy as np
>>> from longling.ML.metrics import classification_report
>>> y_true = np.array([0, 0, 1, 1, 2, 1])
>>> y_pred = np.array([2, 1, 0, 1, 1, 0])
>>> y_score = np.array([
...     [0.15, 0.4, 0.45],
...     [0.1, 0.9, 0.0],
...     [0.33333, 0.333333, 0.333333],
...     [0.15, 0.4, 0.45],
...     [0.1, 0.9, 0.0],
...     [0.33333, 0.333333, 0.333333]
... ])
>>> print(EpochEvalFMT.format(
...     iteration=30,
...     eval_name_value=classification_report(y_true, y_pred, y_score)
... ))    # doctest: +NORMALIZE_WHITESPACE
Epoch [30]
           precision    recall        f1  support
0           0.000000  0.000000  0.000000        2
1           0.333333  0.333333  0.333333        3
2           0.000000  0.000000  0.000000        1
macro_avg   0.111111  0.111111  0.111111        6
accuracy: 0.166667  macro_auc: 0.194444

class longling.ML.toolkit.formatter.EvalFMT(logger=<RootLogger root (WARNING)>, dump_file: (((<class 'str'>, <class 'pathlib.PurePath'>), (<class '_io.TextIOWrapper'>, <class 'typing.TextIO'>, <class 'typing.BinaryIO'>, <class 'codecs.StreamReaderWriter'>, <class 'fileinput.FileInput'>)), None) = False, col: (<class 'int'>, None) = None, **kwargs)[源代码]¶

评价指标格式化类。可以按一定格式快速格式化评价指标。

参数:	logger -- 默认为 root logger dump_file -- 不为空时，将结果写入dump_file col (int) -- 每行放置的指标数量 kwargs -- 拓展兼容性参数

实际案例

>>> import numpy as np
>>> from longling.ML.metrics import classification_report
>>> y_true = np.array([0, 0, 1, 1, 2, 1])
>>> y_pred = np.array([2, 1, 0, 1, 1, 0])
>>> y_score = np.array([
...     [0.15, 0.4, 0.45],
...     [0.1, 0.9, 0.0],
...     [0.33333, 0.333333, 0.333333],
...     [0.15, 0.4, 0.45],
...     [0.1, 0.9, 0.0],
...     [0.33333, 0.333333, 0.333333]
... ])
>>> print(EvalFMT.format(
...     iteration=30,
...     eval_name_value=classification_report(y_true, y_pred, y_score)
... ))    # doctest: +NORMALIZE_WHITESPACE
Iteration [30]
           precision    recall        f1  support
0           0.000000  0.000000  0.000000        2
1           0.333333  0.333333  0.333333        3
2           0.000000  0.000000  0.000000        1
macro_avg   0.111111  0.111111  0.111111        6
accuracy: 0.166667  macro_auc: 0.194444

longling.ML.toolkit.formatter.result_format(data: dict, col=None)[源代码]¶

参数:	data -- col --

实际案例

>>> print(result_format({"a": 1, "b": 2}))    # doctest: +NORMALIZE_WHITESPACE
a: 1        b: 2
>>> print(result_format({"a": 1, "b": {"1": 0.1, "2": 0.3}, "c": {"1": 0.4, "2": 0.0}}))
     1    2
b  0.1  0.3
c  0.4  0.0
a: 1

monitor¶

class longling.ML.toolkit.monitor.EMAValue(value_function_names: (<class 'list'>, <class 'dict'>), smoothing_constant=0.1, *args, **kwargs)[源代码]¶

Exponential moving average: smoothing to give progressively lower weights to older values.

\[losses[name] = (1 - c) \times previous\_value + c \times loss\_value\]

>>> ema = EMAValue(["l2"])
>>> ema["l2"]
nan
>>> ema("l2", 100)
>>> ema("l2", 1)
>>> ema["l2"]
90.1
>>> list(ema.values())
[90.1]
>>> list(ema.keys())
['l2']
>>> list(ema.items())
[('l2', 90.1)]
>>> ema.reset()
>>> ema["l2"]
nan
>>> ema = EMAValue(["l1", "l2"])
>>> ema["l2"], ema["l1"]
(nan, nan)
>>> ema.updates({"l1": 1, "l2": 10})
>>> ema.updates({"l1": 10, "l2": 100})
>>> ema["l1"]
1.9
>>> ema["l2"]
19.0
>>> ema = EMAValue(["l1"], smoothing_constant=0.0)
>>> ema["l1"]
nan
>>> ema.updates({"l1": 1})
>>> ema.updates({"l1": 10})
>>> ema["l1"]
1.0
>>> ema = EMAValue(["l1"], smoothing_constant=1.0)
>>> ema.updates({"l1": 1})
>>> ema.updates({"l1": 10})
>>> ema["l1"]
10.0
>>> @as_tmt_value
... def mse_loss(a):
...     return a ** 2
>>> ema = EMAValue({"mse": mse_loss})
>>> ema["mse"]
nan
>>> mse_loss(1)
1
>>> ema["mse"]
1
>>> mse_loss(10)
100
>>> ema["mse"]
10.9
>>> ema = EMAValue({"mse": mse_loss})
>>> mse_loss(1)
1
>>> ema["mse"]
1
>>> ema.monitor_off("mse")
>>> ema.func
{}
>>> mse_loss(10)
100
>>> "mse" not in ema
True
>>> ema.monitor_on("mse", mse_loss)
>>> mse_loss(10)
100
>>> ema["mse"]
100

get_update_value(name: str, value: (<class 'float'>, <class 'int'>))[源代码]¶

参数:	name (str) -- The name of the value to be updated value (int or float) -- New value to include in EMA.

class longling.ML.toolkit.monitor.MovingLoss(value_function_names: (<class 'list'>, <class 'dict'>), smoothing_constant=0.1, *args, **kwargs)[源代码]¶

实际案例

>>> lm = MovingLoss(["l2"])
>>> lm.losses
{'l2': nan}
>>> lm("l2", 100)
>>> lm("l2", 1)
>>> lm["l2"]
90.1

longling.ML.toolkit.monitor.as_tmt_loss(loss_obj, loss2value=<function <lambda>>)[源代码]¶

参数:	loss_obj -- loss2value --

实际案例

>>> @as_tmt_loss
... def mse(v):
...     return v ** 2
>>> mse(2)
4

longling.ML.toolkit.monitor.as_tmt_value(value_obj, transform=<function <lambda>>)[源代码]¶

参数:	value_obj -- transform --

实际案例

>>> def loss_f(a):
...     return a
>>> loss_f(10)
10
>>> tmt_loss_f = as_tmt_value(loss_f)
>>> tmt_loss_f(10)
10
>>> @as_tmt_value
... def loss_f2(a):
...     return a
>>> loss_f2(10)
10

hyper_search¶

longling.ML.toolkit.hyper_search.prepare_hyper_search(cfg_kwargs: dict, reporthook=None, final_reporthook=None, primary_key=None, max_key=True, reporter_cls=None, with_keys: (<class 'list'>, <class 'str'>, None) = None, final_keys: (<class 'list'>, <class 'str'>, None) = None, dump=False, disable=False)[源代码]¶

Updated in v1.3.18

从 nni package 中获取超参，更新配置文件参数。当 nni 不可用或不是 nni 搜索模式时，参数将不会改变。

cfg_kwargs, reporthook, final_reporthook, tag = prepare_hyper_search(
    cfg_kwargs, reporthook, final_reporthook, primary_key="macro_avg:f1"
)

_cfg = Configuration(**cfg_kwargs)
model = Model(_cfg)
...

for epoch in range(_cfg.begin_epoch, _cfg.end_epoch):
    for batch_data in dataset:
        train_model(batch_data)

    data = evaluate_model()
    reporthook(data)

final_reporthook()

参数:

cfg_kwargs (dict) -- 待传入cfg的参数
reporthook --
final_reporthook --
primary_key -- 评估模型用的主键, nni.report_intermediate_result 和 nni.report_final_result 中 metric 的 default
max_key (bool) -- 主键是越大越好
reporter_cls --
with_keys (list or str) -- 其它要存储的 metric，final report时默认为 primary_key 最优时指标
final_keys (list or str) -- with_keys 中使用最后一个 report result 而不是 primary_key 最优时指标
dump (bool) -- 为 True 时，会修改配置文件中 workspace 参数为 workspace/nni.get_experiment_id()/nni.get_trial_id() 使得 nni 的中间结果会被存储下来。
disable --

返回:

cfg_kwargs (dict) -- 插入了nni超参后的配置文件参数
reporthook (function) -- 每个iteration结束后的回调函数，用来报告中间结果。默认 nni.report_intermediate_result。
final_reporthook -- 所有iteration结束后的回调函数，用来报告最终结果。默认 nni.report_final_result
dump (bool) -- 和传入参数保持一致

实际案例

class CFG(Configuration):
    hyper_params = {"hidden_num": 100}
    learning_rate = 0.001
    workspace = ""

cfg_kwargs, reporthook, final_reporthook, dump = prepare_hyper_search(
    {"learning_rate": 0.1}, CFG, primary_key="macro_avg:f1", with_keys="accuracy"
)
# cfg_kwargs: {'learning_rate': 0.1}

when nni start (e.g., using nni create --config _config.yml), suppose in _config.yml:

and in _search_space.json

{
    "hidden_num": {"_type": "choice", "_value": [500, 600, 700, 835, 900]},
}

one of the return cfg_kwargs is {'hyper_params': {'hidden_num': 50}, 'learning_rate': 0.1}