Optuna by 바죠

Optuna는 기계학습을 위해서 탄생한 최적화 도구이다. 아주 편리하게 다양한 기계(CPU/GPU)에서 사용할 수 있는 도구이다.
베이지안 옵티마이제이션과 더불어 사용할 수 있는 최고의 도구가 될 수 있다.



핵심 기능: 
pruning to discard low-quality trials early


import optuna

def objective(trial):
    x = trial.suggest_uniform('x', -10, 10)
    return (x - 2) ** 2

study = optuna.create_study()
study.optimize(objective, n_trials=100)

study.best_params  # E.g. {'x': 2.002108042}




from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import StackingRegressor

final_estimator = GradientBoostingRegressor(n_estimators=200,random_state=42)

estimators = [('xgb', XGBRegressor(tree_method='gpu_hist', **XGB_params)),
              ('lgb', lgbm.LGBMRegressor(device_type='gpu',**lgb_params)),  
              ('cat', CatBoostRegressor(verbose=0, task_type='GPU', **catboost_params))] 

reg = StackingRegressor(estimators=estimators,final_estimator=final_estimator)



---------------------------------------------------------------------------------------------------------------------
import lightgbm as lgb
import numpy as np
import sklearn.datasets
import sklearn.metrics
from sklearn.model_selection import train_test_split

import optuna
from optuna.visualization import plot_contour
from optuna.visualization import plot_edf
from optuna.visualization import plot_intermediate_values
from optuna.visualization import plot_optimization_history
from optuna.visualization import plot_parallel_coordinate
from optuna.visualization import plot_param_importances
from optuna.visualization import plot_slice

SEED = 42
np.random.seed(SEED)
def objective(trial):
    data, target = sklearn.datasets.load_breast_cancer(return_X_y=True)
    train_x, valid_x, train_y, valid_y = train_test_split(data, target, test_size=0.25)
    dtrain = lgb.Dataset(train_x, label=train_y)
    dvalid = lgb.Dataset(valid_x, label=valid_y)

    param = {
        "objective": "binary",
        "metric": "auc",
        "verbosity": -1,
        "boosting_type": "gbdt",
        "bagging_fraction": trial.suggest_float("bagging_fraction", 0.4, 1.0),
        "bagging_freq": trial.suggest_int("bagging_freq", 1, 7),
        "min_child_samples": trial.suggest_int("min_child_samples", 5, 100),
    }

    # Add a callback for pruning.
    pruning_callback = optuna.integration.LightGBMPruningCallback(trial, "auc")
    gbm = lgb.train(
        param, dtrain, valid_sets=[dvalid], verbose_eval=False, callbacks=[pruning_callback]
    )

    preds = gbm.predict(valid_x)
    pred_labels = np.rint(preds)
    accuracy = sklearn.metrics.accuracy_score(valid_y, pred_labels)
    return accuracy

study = optuna.create_study(direction="maximize", sampler=optuna.samplers.TPESampler(seed=SEED),
    pruner=optuna.pruners.MedianPruner(n_warmup_steps=10),
)
study.optimize(objective, n_trials=100, timeout=600)

plot_optimization_history(study)
plot_intermediate_values(study)
plot_parallel_coordinate(study)
plot_parallel_coordinate(study, params=["bagging_freq", "bagging_fraction"])
plot_contour(study)
plot_contour(study, params=["bagging_freq", "bagging_fraction"])
plot_slice(study)
plot_slice(study, params=["bagging_freq", "bagging_fraction"])
plot_param_importances(study)
optuna.visualization.plot_param_importances(
    study, target=lambda t: t.duration.total_seconds(), target_name="duration"
)
plot_edf(study)






핑백

덧글

댓글 입력 영역

최근 포토로그



MathJax