Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 47 additions & 0 deletions flaml/automl/automl.py
Original file line number Diff line number Diff line change
Expand Up @@ -3780,6 +3780,53 @@ def _search(self):
else:
logger.info("not retraining because the time budget is too small.")

def visualize(self):
"""
Show an interative dashboard widget for a trained AutoML instance.
Must be called after fit(...).
"""

import matplotlib.pyplot as plt
from ipywidgets import interact

@interact
def helper(option=["Feature Importance", "Learning Curve"]):
if option == "Feature Importance":
plt.barh(self.feature_names_in_, self.feature_importances_)
plt.xlabel("Feature Importance")
plt.ylabel("Feature")
plt.show()

if option == "Learning Curve":
from flaml.data import get_output_from_log

log_file_name = self._settings.get("log_file_name")
if not log_file_name:
logger.warning(
"Log file for this instance not found. Unable to visualize learning curve."
)
else:
(
time_history,
best_valid_loss_history,
valid_loss_history,
config_history,
metric_history,
) = get_output_from_log(filename=log_file_name, time_budget=240)

plt.title("Learning Curve")
plt.xlabel("Wall Clock Time (s)")
plt.ylabel("Validation Accuracy")
plt.scatter(time_history, 1 - np.array(valid_loss_history))
plt.step(
time_history,
1 - np.array(best_valid_loss_history),
where="post",
)
plt.show()

helper()

def __del__(self):
if (
hasattr(self, "_trained_estimator")
Expand Down
136 changes: 136 additions & 0 deletions notebook/visualize.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"<jemalloc>: MADV_DONTNEED does not work (memset will be used instead)\n",
"<jemalloc>: (This is the expected behaviour if you are running under QEMU)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Enabling notebook extension jupyter-js-widgets/extension...\n",
" - Validating: \u001b[32mOK\u001b[0m\n",
"load dataset from ./openml_ds1169.pkl\n",
"Dataset name: airlines\n",
"X_train.shape: (404537, 7), y_train.shape: (404537,);\n",
"X_test.shape: (134846, 7), y_test.shape: (134846,)\n",
"[flaml.automl.automl: 01-27 05:48:20] {2715} INFO - task = classification\n",
"[flaml.automl.automl: 01-27 05:48:20] {2717} INFO - Data split method: stratified\n",
"[flaml.automl.automl: 01-27 05:48:20] {2720} INFO - Evaluation method: holdout\n",
"[flaml.automl.automl: 01-27 05:48:22] {2847} INFO - Minimizing error metric: 1-accuracy\n",
"[flaml.automl.automl: 01-27 05:48:22] {2993} INFO - List of ML learners in AutoML Run: ['lgbm', 'rf', 'catboost', 'xgboost', 'extra_tree', 'xgb_limitdepth', 'lrl1']\n",
"[flaml.automl.automl: 01-27 05:48:22] {3322} INFO - iteration 0, current learner lgbm\n",
"[flaml.automl.automl: 01-27 05:48:23] {3461} INFO - Estimated sufficient time budget=275476s. Estimated necessary time budget=6763s.\n",
"[flaml.automl.automl: 01-27 05:48:23] {3513} INFO - at 4.4s,\testimator lgbm's best error=0.4459,\tbest estimator lgbm's best error=0.4459\n",
"[flaml.automl.automl: 01-27 05:48:23] {3322} INFO - iteration 1, current learner lgbm\n",
"[flaml.automl.automl: 01-27 05:48:23] {3513} INFO - at 4.9s,\testimator lgbm's best error=0.4459,\tbest estimator lgbm's best error=0.4459\n",
"[flaml.automl.automl: 01-27 05:48:23] {3322} INFO - iteration 2, current learner lgbm\n",
"[flaml.automl.automl: 01-27 05:48:24] {3513} INFO - at 5.4s,\testimator lgbm's best error=0.3777,\tbest estimator lgbm's best error=0.3777\n",
"[flaml.automl.automl: 01-27 05:48:24] {3322} INFO - iteration 3, current learner lgbm\n",
"[flaml.automl.automl: 01-27 05:48:24] {3513} INFO - at 5.8s,\testimator lgbm's best error=0.3777,\tbest estimator lgbm's best error=0.3777\n",
"[flaml.automl.automl: 01-27 05:48:24] {3322} INFO - iteration 4, current learner lgbm\n",
"[flaml.automl.automl: 01-27 05:48:24] {3513} INFO - at 6.0s,\testimator lgbm's best error=0.3777,\tbest estimator lgbm's best error=0.3777\n",
"[flaml.automl.automl: 01-27 05:48:24] {3322} INFO - iteration 5, current learner lgbm\n",
"[flaml.automl.automl: 01-27 05:48:25] {3513} INFO - at 6.4s,\testimator lgbm's best error=0.3777,\tbest estimator lgbm's best error=0.3777\n",
"[flaml.automl.automl: 01-27 05:48:25] {3322} INFO - iteration 6, current learner lgbm\n",
"[flaml.automl.automl: 01-27 05:48:25] {3513} INFO - at 6.9s,\testimator lgbm's best error=0.3777,\tbest estimator lgbm's best error=0.3777\n",
"[flaml.automl.automl: 01-27 05:48:25] {3322} INFO - iteration 7, current learner xgboost\n",
"[flaml.automl.automl: 01-27 05:48:26] {3513} INFO - at 8.0s,\testimator xgboost's best error=0.3787,\tbest estimator lgbm's best error=0.3777\n",
"[flaml.automl.automl: 01-27 05:48:26] {3322} INFO - iteration 8, current learner lgbm\n",
"[flaml.automl.automl: 01-27 05:48:27] {3513} INFO - at 8.6s,\testimator lgbm's best error=0.3777,\tbest estimator lgbm's best error=0.3777\n",
"[flaml.automl.automl: 01-27 05:48:27] {3322} INFO - iteration 9, current learner xgboost\n",
"[flaml.automl.automl: 01-27 05:48:28] {3513} INFO - at 9.6s,\testimator xgboost's best error=0.3787,\tbest estimator lgbm's best error=0.3777\n",
"[flaml.automl.automl: 01-27 05:48:28] {3322} INFO - iteration 10, current learner extra_tree\n",
"[flaml.automl.automl: 01-27 05:48:28] {3513} INFO - at 9.9s,\testimator extra_tree's best error=0.4459,\tbest estimator lgbm's best error=0.3777\n",
"[flaml.automl.automl: 01-27 05:48:28] {3322} INFO - iteration 11, current learner rf\n",
"[flaml.automl.automl: 01-27 05:48:28] {3513} INFO - at 10.2s,\testimator rf's best error=0.4421,\tbest estimator lgbm's best error=0.3777\n",
"[flaml.automl.automl: 01-27 05:48:31] {3773} INFO - retrain lgbm for 2.3s\n",
"[flaml.automl.automl: 01-27 05:48:31] {3778} INFO - retrained model: LGBMClassifier(learning_rate=0.26770501231052046, max_bin=127,\n",
" min_child_samples=12, n_estimators=4, num_leaves=4,\n",
" reg_alpha=0.001348364934537134, reg_lambda=1.4442580148221913,\n",
" verbose=-1)\n",
"[flaml.automl.automl: 01-27 05:48:31] {3023} INFO - fit succeeded\n",
"[flaml.automl.automl: 01-27 05:48:31] {3025} INFO - Time taken to find the best model: 5.436654806137085\n"
]
}
],
"source": [
"from flaml.automl.automl import AutoML\n",
"from flaml.data import load_openml_dataset\n",
"!jupyter nbextension enable --py widgetsnbextension\n",
"\n",
"X_train, X_test, y_train, y_test = load_openml_dataset(dataset_id=1169, data_dir=\"./\")\n",
"settings = {\n",
" \"time_budget\": 10, # total running time in seconds\n",
" \"metric\": \"accuracy\", # can be: 'r2', 'rmse', 'mae', 'mse', 'accuracy', 'roc_auc', 'roc_auc_ovr',\n",
" # 'roc_auc_ovo', 'log_loss', 'mape', 'f1', 'ap', 'ndcg', 'micro_f1', 'macro_f1'\n",
" \"task\": \"classification\", # task type\n",
" \"log_file_name\": \"airlines_experiment.log\", # flaml log file\n",
" \"seed\": 7654321, # random seed\n",
"}\n",
"automl = AutoML(**settings)\n",
"automl.fit(X_train=X_train, y_train=y_train)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "a652c19b24404f6cb9d3f2938b033ec8",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"interactive(children=(Dropdown(description='option', options=('Feature Importance', 'Learning Curve'), value='…"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"automl.visualize()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.16"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}