General Toolkit¶
analyser¶
-
longling.ML.toolkit.analyser.
get_max
(src: ((<class 'str'>, <class 'pathlib.PurePath'>), <class 'list'>), *keys, with_keys: (<class 'str'>, None) = None, with_all=False, merge=True)[源代码]¶ 实际案例
>>> src = [ ... {"Epoch": 0, "macro avg": {"f1": 0.7}, "loss": 0.04, "accuracy": 0.7}, ... {"Epoch": 1, "macro avg": {"f1": 0.88}, "loss": 0.03, "accuracy": 0.8}, ... {"Epoch": 1, "macro avg": {"f1": 0.7}, "loss": 0.02, "accuracy": 0.66} ... ] >>> result, _ = get_max(src, "accuracy", merge=False) >>> result {'accuracy': 0.8} >>> _, result_appendix = get_max(src, "accuracy", with_all=True, merge=False) >>> result_appendix {'accuracy': {'Epoch': 1, 'macro avg': {'f1': 0.88}, 'loss': 0.03, 'accuracy': 0.8}} >>> result, result_appendix = get_max(src, "accuracy", "macro avg:f1", with_keys="Epoch", merge=False) >>> result {'accuracy': 0.8, 'macro avg:f1': 0.88} >>> result_appendix {'accuracy': {'Epoch': 1}, 'macro avg:f1': {'Epoch': 1}} >>> get_max(src, "accuracy", "macro avg:f1", with_keys="Epoch") {'accuracy': {'Epoch': 1, 'accuracy': 0.8}, 'macro avg:f1': {'Epoch': 1, 'macro avg:f1': 0.88}}
-
longling.ML.toolkit.analyser.
get_min
(src: ((<class 'str'>, <class 'pathlib.PurePath'>), <class 'list'>), *keys, with_keys: (<class 'str'>, None) = None, with_all=False, merge=True)[源代码]¶ >>> src = [ ... {"Epoch": 0, "macro avg": {"f1": 0.7}, "loss": 0.04, "accuracy": 0.7}, ... {"Epoch": 1, "macro avg": {"f1": 0.88}, "loss": 0.03, "accuracy": 0.8}, ... {"Epoch": 1, "macro avg": {"f1": 0.7}, "loss": 0.02, "accuracy": 0.66} ... ] >>> get_min(src, "loss") {'loss': 0.02}
dataset¶
-
class
longling.ML.toolkit.dataset.
ID2Feature
(feature_df: pandas.core.frame.DataFrame, id_field=None, set_index=False)[源代码]¶ 实际案例
>>> import pandas as pd >>> df = pd.DataFrame({"id": [0, 1, 2, 3, 4], "numeric": [1, 2, 3, 4, 5], "text": ["a", "b", "c", "d", "e"]}) >>> i2f = ID2Feature(df, id_field="id", set_index=True) >>> i2f[2] numeric 3 text c Name: 2, dtype: object >>> i2f[[2, 3]]["numeric"] id 2 3 3 4 Name: numeric, dtype: int64 >>> i2f(2) [3, 'c'] >>> i2f([2, 3]) [[3, 'c'], [4, 'd']]
-
class
longling.ML.toolkit.dataset.
ItemSpecificSampler
(triplet_df: pandas.core.frame.DataFrame, query_field='item_id', pos_field='pos', neg_field='neg', set_index=False, item_id_range=None, user_id_range=None, random_state=10)[源代码]¶ 实际案例
>>> import pandas as pd >>> user_num = 3 >>> item_num = 4 >>> rating_matrix = pd.DataFrame({ ... "user_id": [0, 1, 1, 1, 2], ... "item_id": [1, 3, 0, 2, 1] ... }) >>> triplet_df = ItemSpecificSampler.rating2triplet(rating_matrix) >>> triplet_df # doctest: +NORMALIZE_WHITESPACE pos neg item_id 0 [1] [] 1 [0, 2] [] 2 [1] [] 3 [1] [] >>> triplet_df.index Int64Index([0, 1, 2, 3], dtype='int64', name='item_id') >>> sampler = ItemSpecificSampler(triplet_df) >>> sampler(1) (0, [0]) >>> sampler = ItemSpecificSampler(triplet_df, user_id_range=user_num) >>> sampler(0, implicit=True) (1, [2]) >>> sampler(0, 5, implicit=True) (2, [2, 0, 0, 0, 0]) >>> sampler(0, 5, implicit=True, pad_value=-1) (2, [0, 2, -1, -1, -1]) >>> sampler([0, 1, 2], 5, implicit=True, pad_value=-1) [(2, [0, 2, -1, -1, -1]), (1, [1, -1, -1, -1, -1]), (2, [0, 2, -1, -1, -1])] >>> rating_matrix = pd.DataFrame({ ... "user_id": [0, 1, 1, 1, 2], ... "item_id": [1, 3, 0, 2, 1], ... "score": [1, 0, 1, 1, 0] ... }) >>> triplet_df = ItemSpecificSampler.rating2triplet(rating_matrix=rating_matrix, value_field="score") >>> triplet_df # doctest: +NORMALIZE_WHITESPACE pos neg item_id 0 [1] [] 1 [0] [2] 2 [1] [] 3 [] [1] >>> sampler = UserSpecificPairSampler(triplet_df) >>> sampler([0, 1, 2], 5, pad_value=-1) [(0, [-1, -1, -1, -1, -1]), (1, [2, -1, -1, -1, -1]), (0, [-1, -1, -1, -1, -1])] >>> sampler([0, 1, 2], 5, neg=False, pad_value=-1) [(1, [1, -1, -1, -1, -1]), (1, [0, -1, -1, -1, -1]), (1, [1, -1, -1, -1, -1])] >>> sampler(rating_matrix["item_id"], 2, neg=rating_matrix["score"], ... excluded_key=rating_matrix["user_id"], pad_value=-1) [(1, [2, -1]), (0, [-1, -1]), (0, [-1, -1]), (0, [-1, -1]), (1, [0, -1])] >>> sampler(rating_matrix["item_id"], 2, neg=rating_matrix["score"], ... excluded_key=rating_matrix["user_id"], pad_value=-1, return_column=True) ((1, 0, 0, 0, 1), ([2, -1], [-1, -1], [-1, -1], [-1, -1], [0, -1])) >>> sampler(rating_matrix["item_id"], 2, neg=rating_matrix["score"], ... excluded_key=rating_matrix["user_id"], pad_value=-1, return_column=True, split_sample_to_column=True) ((1, 0, 0, 0, 1), [(2, -1, -1, -1, 0), (-1, -1, -1, -1, -1)])
-
class
longling.ML.toolkit.dataset.
TripletPairSampler
(triplet_df: pandas.core.frame.DataFrame, query_field, pos_field='pos', neg_field='neg', set_index=False, query_range: (<class 'int'>, <class 'tuple'>, <class 'list'>) = None, key_range: (<class 'int'>, <class 'tuple'>, <class 'list'>) = None, random_state=10)[源代码]¶ 实际案例
>>> # implicit feedback >>> import pandas as pd >>> triplet_df = pd.DataFrame({ ... "query": [0, 1, 2], ... "pos": [[1], [3, 0, 2], [1]], ... "neg": [[], [], []] ... }) >>> sampler = TripletPairSampler(triplet_df, "query", set_index=True) >>> rating_matrix = pd.DataFrame({ ... "query": [0, 1, 1, 1, 2], ... "key": [1, 3, 0, 2, 1] ... }) >>> triplet_df = TripletPairSampler.rating2triplet(rating_matrix, query_field="query", key_field="key") >>> triplet_df # doctest: +NORMALIZE_WHITESPACE pos neg query 0 [1] [] 1 [3, 0, 2] [] 2 [1] [] >>> sampler = TripletPairSampler(triplet_df, "query") >>> sampler(0) (0, [0]) >>> sampler(0, 3) (0, [0, 0, 0]) >>> sampler(0, 3, padding=False) (0, []) >>> sampler = TripletPairSampler(triplet_df, "query", query_range=3, key_range=4) >>> sampler(0) (0, [0]) >>> sampler(0, 3) (0, [0, 0, 0]) >>> sampler(0, 3, padding=False) (0, []) >>> sampler(0, 5, padding=False, implicit=True) (3, [2, 3, 0]) >>> sampler(0, 5, padding=False, implicit=True, excluded_key=[3]) (2, [0, 2]) >>> sampler(0, 5, padding=True, implicit=True, excluded_key=[3]) (2, [2, 0, 0, 0, 0]) >>> sampler(0, 5, implicit=True, pad_value=-1) (3, [2, 3, 0, -1, -1]) >>> sampler(0, 5, implicit=True, fast_implicit=True, pad_value=-1) (3, [0, 2, 3, -1, -1]) >>> sampler(0, 5, implicit=True, fast_implicit=True, with_n_implicit=3, pad_value=-1) (3, [0, 2, 3, -1, -1, -1, -1, -1]) >>> sampler(0, 5, implicit=True, fast_implicit=True, with_n_implicit=3, pad_value=-1, padding_implicit=True) (3, [0, 2, 3, -1, -1, -1, -1, -1]) >>> rating_matrix = pd.DataFrame({ ... "query": [0, 1, 1, 1, 2], ... "key": [1, 3, 0, 2, 1], ... "score": [1, 0, 1, 1, 0] ... }) >>> triplet_df = TripletPairSampler.rating2triplet( ... rating_matrix, ... "query", "key", ... value_field="score" ... ) >>> triplet_df # doctest: +NORMALIZE_WHITESPACE pos neg query 0 [1] [] 1 [0, 2] [3] 2 [] [1] >>> sampler = TripletPairSampler(triplet_df, "query", query_range=3, key_range=4) >>> sampler([0, 1, 2], 5, implicit=True, pad_value=-1) [(3, [2, 3, 0, -1, -1]), (1, [1, -1, -1, -1, -1]), (3, [3, 0, 2, -1, -1])] >>> sampler([0, 1, 2], 5, pad_value=-1) [(0, [-1, -1, -1, -1, -1]), (1, [3, -1, -1, -1, -1]), (1, [1, -1, -1, -1, -1])] >>> sampler([0, 1, 2], 5, neg=False, pad_value=-1) [(1, [1, -1, -1, -1, -1]), (2, [0, 2, -1, -1, -1]), (0, [-1, -1, -1, -1, -1])] >>> sampler(rating_matrix["query"], 2, neg=rating_matrix["score"], ... excluded_key=rating_matrix["key"], pad_value=-1) [(0, [-1, -1]), (2, [2, 0]), (1, [3, -1]), (1, [3, -1]), (0, [-1, -1])] >>> sampler(rating_matrix["query"], 2, neg=rating_matrix["score"], ... excluded_key=rating_matrix["key"], pad_value=-1, return_column=True) ((0, 2, 1, 1, 0), ([-1, -1], [0, 2], [3, -1], [3, -1], [-1, -1])) >>> sampler(rating_matrix["query"], 2, neg=rating_matrix["score"], ... excluded_key=rating_matrix["key"], pad_value=-1, return_column=True, split_sample_to_column=True) ((0, 2, 1, 1, 0), [(-1, 0, 3, 3, -1), (-1, 2, -1, -1, -1)]) >>> rating_matrix = pd.DataFrame({ ... "query": [0, 1, 1, 1, 2], ... "key": [1, 3, 0, 2, 1], ... "score": [0.8, 0.4, 0.7, 0.5, 0.1] ... }) >>> TripletPairSampler.rating2triplet( ... rating_matrix, ... "query", "key", ... value_field="score", ... value_threshold=0.5 ... ) # doctest: +NORMALIZE_WHITESPACE pos neg query 0 [1] [] 1 [0, 2] [3] 2 [] [1]
-
class
longling.ML.toolkit.dataset.
UserSpecificPairSampler
(triplet_df: pandas.core.frame.DataFrame, query_field='user_id', pos_field='pos', neg_field='neg', set_index=False, user_id_range=None, item_id_range=None, random_state=10)[源代码]¶ 实际案例
>>> import pandas as pd >>> user_num = 3 >>> item_num = 4 >>> rating_matrix = pd.DataFrame({ ... "user_id": [0, 1, 1, 1, 2], ... "item_id": [1, 3, 0, 2, 1] ... }) >>> triplet_df = UserSpecificPairSampler.rating2triplet(rating_matrix) >>> triplet_df # doctest: +NORMALIZE_WHITESPACE pos neg user_id 0 [1] [] 1 [3, 0, 2] [] 2 [1] [] >>> sampler = UserSpecificPairSampler(triplet_df) >>> sampler(1) (0, [0]) >>> sampler = UserSpecificPairSampler(triplet_df, item_id_range=item_num) >>> sampler(0, implicit=True) (1, [3]) >>> sampler(0, 5, implicit=True) (3, [3, 2, 0, 0, 0]) >>> sampler(0, 5, implicit=True, pad_value=-1) (3, [3, 2, 0, -1, -1]) >>> sampler([0, 1, 2], 5, implicit=True, pad_value=-1) [(3, [2, 3, 0, -1, -1]), (1, [1, -1, -1, -1, -1]), (3, [2, 0, 3, -1, -1])] >>> rating_matrix = pd.DataFrame({ ... "user_id": [0, 1, 1, 1, 2], ... "item_id": [1, 3, 0, 2, 1], ... "score": [1, 0, 1, 1, 0] ... }) >>> triplet_df = UserSpecificPairSampler.rating2triplet(rating_matrix=rating_matrix, value_field="score") >>> triplet_df # doctest: +NORMALIZE_WHITESPACE pos neg user_id 0 [1] [] 1 [0, 2] [3] 2 [] [1] >>> sampler = UserSpecificPairSampler(triplet_df) >>> sampler([0, 1, 2], 5, pad_value=-1) [(0, [-1, -1, -1, -1, -1]), (1, [3, -1, -1, -1, -1]), (1, [1, -1, -1, -1, -1])] >>> sampler([0, 1, 2], 5, neg=False, pad_value=-1) [(1, [1, -1, -1, -1, -1]), (2, [0, 2, -1, -1, -1]), (0, [-1, -1, -1, -1, -1])] >>> sampler(rating_matrix["user_id"], 2, neg=rating_matrix["score"], ... excluded_key=rating_matrix["item_id"], pad_value=-1) [(0, [-1, -1]), (2, [2, 0]), (1, [3, -1]), (1, [3, -1]), (0, [-1, -1])] >>> sampler(rating_matrix["user_id"], 2, neg=rating_matrix["score"], ... excluded_key=rating_matrix["item_id"], pad_value=-1, return_column=True) ((0, 2, 1, 1, 0), ([-1, -1], [0, 2], [3, -1], [3, -1], [-1, -1])) >>> sampler(rating_matrix["user_id"], 2, neg=rating_matrix["score"], ... excluded_key=rating_matrix["item_id"], pad_value=-1, return_column=True, split_sample_to_column=True) ((0, 2, 1, 1, 0), [(-1, 2, 3, 3, -1), (-1, 0, -1, -1, -1)])
-
longling.ML.toolkit.dataset.
train_test
(*files, train_size: (<class 'float'>, <class 'int'>) = 0.8, test_size: (<class 'float'>, <class 'int'>, None) = None, ratio=None, random_state=None, shuffle=True, target_names=None, suffix: list = None, prefix='', logger=<Logger dataset (INFO)>, **kwargs)[源代码]¶ 参数: - files --
- train_size (float, int, or None, (default=0.8)) -- Represent the proportion of the dataset to include in the train split.
- test_size (float, int, or None) -- Represent the proportion of the dataset to include in the train split.
- random_state (int, RandomState instance or None, optional (default=None)) -- If int, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; If None, the random number generator is the RandomState instance used by np.random.
- shuffle (boolean, optional (default=True)) -- Whether or not to shuffle the data before splitting. If shuffle=False then stratify must be None
- target_names (list of PATH_TYPE) --
- suffix (list) --
- kwargs --
-
longling.ML.toolkit.dataset.
train_valid_test
(*files, train_size: (<class 'float'>, <class 'int'>) = 0.8, valid_size: (<class 'float'>, <class 'int'>) = 0.1, test_size: (<class 'float'>, <class 'int'>, None) = None, ratio=None, random_state=None, shuffle=True, target_names=None, suffix: list = None, logger=<Logger dataset (INFO)>, prefix='', **kwargs)[源代码]¶ 参数: - files --
- train_size (float, int, or None, (default=0.8)) -- Represent the proportion of the dataset to include in the train split.
- valid_size (float, int, or None, (default=0.1)) -- Represent the proportion of the dataset to include in the valid split.
- test_size (float, int, or None) -- Represent the proportion of the dataset to include in the test split.
- random_state (int, RandomState instance or None, optional (default=None)) -- If int, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; If None, the random number generator is the RandomState instance used by np.random.
- shuffle (boolean, optional (default=True)) -- Whether or not to shuffle the data before splitting. If shuffle=False then stratify must be None
- target_names --
- suffix (list) --
- kwargs --
formatter¶
-
class
longling.ML.toolkit.formatter.
EpisodeEvalFMT
(logger=<RootLogger root (WARNING)>, dump_file: (((<class 'str'>, <class 'pathlib.PurePath'>), (<class '_io.TextIOWrapper'>, <class 'typing.TextIO'>, <class 'typing.BinaryIO'>, <class 'codecs.StreamReaderWriter'>, <class 'fileinput.FileInput'>)), None) = False, col: (<class 'int'>, None) = None, **kwargs)[源代码]¶ 实际案例
>>> import numpy as np >>> from longling.ML.metrics import classification_report >>> y_true = np.array([0, 0, 1, 1, 2, 1]) >>> y_pred = np.array([2, 1, 0, 1, 1, 0]) >>> y_score = np.array([ ... [0.15, 0.4, 0.45], ... [0.1, 0.9, 0.0], ... [0.33333, 0.333333, 0.333333], ... [0.15, 0.4, 0.45], ... [0.1, 0.9, 0.0], ... [0.33333, 0.333333, 0.333333] ... ]) >>> print(EpisodeEvalFMT.format( ... iteration=30, ... eval_name_value=classification_report(y_true, y_pred, y_score) ... )) # doctest: +NORMALIZE_WHITESPACE Episode [30] precision recall f1 support 0 0.000000 0.000000 0.000000 2 1 0.333333 0.333333 0.333333 3 2 0.000000 0.000000 0.000000 1 macro_avg 0.111111 0.111111 0.111111 6 accuracy: 0.166667 macro_auc: 0.194444
-
class
longling.ML.toolkit.formatter.
EpochEvalFMT
(logger=<RootLogger root (WARNING)>, dump_file: (((<class 'str'>, <class 'pathlib.PurePath'>), (<class '_io.TextIOWrapper'>, <class 'typing.TextIO'>, <class 'typing.BinaryIO'>, <class 'codecs.StreamReaderWriter'>, <class 'fileinput.FileInput'>)), None) = False, col: (<class 'int'>, None) = None, **kwargs)[源代码]¶ 实际案例
>>> import numpy as np >>> from longling.ML.metrics import classification_report >>> y_true = np.array([0, 0, 1, 1, 2, 1]) >>> y_pred = np.array([2, 1, 0, 1, 1, 0]) >>> y_score = np.array([ ... [0.15, 0.4, 0.45], ... [0.1, 0.9, 0.0], ... [0.33333, 0.333333, 0.333333], ... [0.15, 0.4, 0.45], ... [0.1, 0.9, 0.0], ... [0.33333, 0.333333, 0.333333] ... ]) >>> print(EpochEvalFMT.format( ... iteration=30, ... eval_name_value=classification_report(y_true, y_pred, y_score) ... )) # doctest: +NORMALIZE_WHITESPACE Epoch [30] precision recall f1 support 0 0.000000 0.000000 0.000000 2 1 0.333333 0.333333 0.333333 3 2 0.000000 0.000000 0.000000 1 macro_avg 0.111111 0.111111 0.111111 6 accuracy: 0.166667 macro_auc: 0.194444
-
class
longling.ML.toolkit.formatter.
EvalFMT
(logger=<RootLogger root (WARNING)>, dump_file: (((<class 'str'>, <class 'pathlib.PurePath'>), (<class '_io.TextIOWrapper'>, <class 'typing.TextIO'>, <class 'typing.BinaryIO'>, <class 'codecs.StreamReaderWriter'>, <class 'fileinput.FileInput'>)), None) = False, col: (<class 'int'>, None) = None, **kwargs)[源代码]¶ 评价指标格式化类。可以按一定格式快速格式化评价指标。
参数: - logger -- 默认为 root logger
- dump_file -- 不为空时,将结果写入dump_file
- col (int) -- 每行放置的指标数量
- kwargs -- 拓展兼容性参数
实际案例
>>> import numpy as np >>> from longling.ML.metrics import classification_report >>> y_true = np.array([0, 0, 1, 1, 2, 1]) >>> y_pred = np.array([2, 1, 0, 1, 1, 0]) >>> y_score = np.array([ ... [0.15, 0.4, 0.45], ... [0.1, 0.9, 0.0], ... [0.33333, 0.333333, 0.333333], ... [0.15, 0.4, 0.45], ... [0.1, 0.9, 0.0], ... [0.33333, 0.333333, 0.333333] ... ]) >>> print(EvalFMT.format( ... iteration=30, ... eval_name_value=classification_report(y_true, y_pred, y_score) ... )) # doctest: +NORMALIZE_WHITESPACE Iteration [30] precision recall f1 support 0 0.000000 0.000000 0.000000 2 1 0.333333 0.333333 0.333333 3 2 0.000000 0.000000 0.000000 1 macro_avg 0.111111 0.111111 0.111111 6 accuracy: 0.166667 macro_auc: 0.194444
monitor¶
-
class
longling.ML.toolkit.monitor.
EMAValue
(value_function_names: (<class 'list'>, <class 'dict'>), smoothing_constant=0.1, *args, **kwargs)[源代码]¶ Exponential moving average: smoothing to give progressively lower weights to older values.
\[losses[name] = (1 - c) \times previous\_value + c \times loss\_value\]>>> ema = EMAValue(["l2"]) >>> ema["l2"] nan >>> ema("l2", 100) >>> ema("l2", 1) >>> ema["l2"] 90.1 >>> list(ema.values()) [90.1] >>> list(ema.keys()) ['l2'] >>> list(ema.items()) [('l2', 90.1)] >>> ema.reset() >>> ema["l2"] nan >>> ema = EMAValue(["l1", "l2"]) >>> ema["l2"], ema["l1"] (nan, nan) >>> ema.updates({"l1": 1, "l2": 10}) >>> ema.updates({"l1": 10, "l2": 100}) >>> ema["l1"] 1.9 >>> ema["l2"] 19.0 >>> ema = EMAValue(["l1"], smoothing_constant=0.0) >>> ema["l1"] nan >>> ema.updates({"l1": 1}) >>> ema.updates({"l1": 10}) >>> ema["l1"] 1.0 >>> ema = EMAValue(["l1"], smoothing_constant=1.0) >>> ema.updates({"l1": 1}) >>> ema.updates({"l1": 10}) >>> ema["l1"] 10.0 >>> @as_tmt_value ... def mse_loss(a): ... return a ** 2 >>> ema = EMAValue({"mse": mse_loss}) >>> ema["mse"] nan >>> mse_loss(1) 1 >>> ema["mse"] 1 >>> mse_loss(10) 100 >>> ema["mse"] 10.9 >>> ema = EMAValue({"mse": mse_loss}) >>> mse_loss(1) 1 >>> ema["mse"] 1 >>> ema.monitor_off("mse") >>> ema.func {} >>> mse_loss(10) 100 >>> "mse" not in ema True >>> ema.monitor_on("mse", mse_loss) >>> mse_loss(10) 100 >>> ema["mse"] 100
-
class
longling.ML.toolkit.monitor.
MovingLoss
(value_function_names: (<class 'list'>, <class 'dict'>), smoothing_constant=0.1, *args, **kwargs)[源代码]¶ 实际案例
>>> lm = MovingLoss(["l2"]) >>> lm.losses {'l2': nan} >>> lm("l2", 100) >>> lm("l2", 1) >>> lm["l2"] 90.1
-
longling.ML.toolkit.monitor.
as_tmt_loss
(loss_obj, loss2value=<function <lambda>>)[源代码]¶ 参数: - loss_obj --
- loss2value --
实际案例
>>> @as_tmt_loss ... def mse(v): ... return v ** 2 >>> mse(2) 4
-
longling.ML.toolkit.monitor.
as_tmt_value
(value_obj, transform=<function <lambda>>)[源代码]¶ 参数: - value_obj --
- transform --
实际案例
>>> def loss_f(a): ... return a >>> loss_f(10) 10 >>> tmt_loss_f = as_tmt_value(loss_f) >>> tmt_loss_f(10) 10 >>> @as_tmt_value ... def loss_f2(a): ... return a >>> loss_f2(10) 10
hyper_search¶
-
longling.ML.toolkit.hyper_search.
prepare_hyper_search
(cfg_kwargs: dict, reporthook=None, final_reporthook=None, primary_key=None, max_key=True, reporter_cls=None, with_keys: (<class 'list'>, <class 'str'>, None) = None, final_keys: (<class 'list'>, <class 'str'>, None) = None, dump=False, disable=False)[源代码]¶ Updated in v1.3.18
从 nni package 中获取超参,更新配置文件参数。当 nni 不可用或不是 nni 搜索模式时,参数将不会改变。
cfg_kwargs, reporthook, final_reporthook, tag = prepare_hyper_search( cfg_kwargs, reporthook, final_reporthook, primary_key="macro_avg:f1" ) _cfg = Configuration(**cfg_kwargs) model = Model(_cfg) ... for epoch in range(_cfg.begin_epoch, _cfg.end_epoch): for batch_data in dataset: train_model(batch_data) data = evaluate_model() reporthook(data) final_reporthook()
参数: - cfg_kwargs (dict) -- 待传入cfg的参数
- reporthook --
- final_reporthook --
- primary_key -- 评估模型用的主键,
nni.report_intermediate_result
和nni.report_final_result
中metric
的default
- max_key (bool) -- 主键是越大越好
- reporter_cls --
- with_keys (list or str) -- 其它要存储的 metric,final report时默认为 primary_key 最优时指标
- final_keys (list or str) -- with_keys 中使用最后一个 report result 而不是 primary_key 最优时指标
- dump (bool) -- 为 True 时,会修改 配置文件 中 workspace 参数为
workspace/nni.get_experiment_id()/nni.get_trial_id()
使得 nni 的中间结果会被存储下来。 - disable --
返回: - cfg_kwargs (dict) -- 插入了nni超参后的配置文件参数
- reporthook (function) -- 每个iteration结束后的回调函数,用来报告中间结果。
默认
nni.report_intermediate_result
。 - final_reporthook -- 所有iteration结束后的回调函数,用来报告最终结果。
默认
nni.report_final_result
- dump (bool) -- 和传入参数保持一致
实际案例
class CFG(Configuration): hyper_params = {"hidden_num": 100} learning_rate = 0.001 workspace = "" cfg_kwargs, reporthook, final_reporthook, dump = prepare_hyper_search( {"learning_rate": 0.1}, CFG, primary_key="macro_avg:f1", with_keys="accuracy" ) # cfg_kwargs: {'learning_rate': 0.1}
when nni start (e.g., using
nni create --config _config.yml
), suppose in_config.yml
:and in
_search_space.json
{ "hidden_num": {"_type": "choice", "_value": [500, 600, 700, 835, 900]}, }
one of the return cfg_kwargs is
{'hyper_params': {'hidden_num': 50}, 'learning_rate': 0.1}