From 37121506ae804baf43f173ec6f6bd722da14867c Mon Sep 17 00:00:00 2001 From: WenqinGan Date: Fri, 27 Jan 2023 06:47:35 +0000 Subject: [PATCH] finished basic visualization --- flaml/automl/automl.py | 47 ++++++++++++++ notebook/visualize.ipynb | 136 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 183 insertions(+) create mode 100644 notebook/visualize.ipynb diff --git a/flaml/automl/automl.py b/flaml/automl/automl.py index 1b6bef199b..e28c600f97 100644 --- a/flaml/automl/automl.py +++ b/flaml/automl/automl.py @@ -3779,6 +3779,53 @@ def _search(self): else: logger.info("not retraining because the time budget is too small.") + def visualize(self): + """ + Show an interative dashboard widget for a trained AutoML instance. + Must be called after fit(...). + """ + + import matplotlib.pyplot as plt + from ipywidgets import interact + + @interact + def helper(option=["Feature Importance", "Learning Curve"]): + if option == "Feature Importance": + plt.barh(self.feature_names_in_, self.feature_importances_) + plt.xlabel("Feature Importance") + plt.ylabel("Feature") + plt.show() + + if option == "Learning Curve": + from flaml.data import get_output_from_log + + log_file_name = self._settings.get("log_file_name") + if not log_file_name: + logger.warning( + "Log file for this instance not found. Unable to visualize learning curve." + ) + else: + ( + time_history, + best_valid_loss_history, + valid_loss_history, + config_history, + metric_history, + ) = get_output_from_log(filename=log_file_name, time_budget=240) + + plt.title("Learning Curve") + plt.xlabel("Wall Clock Time (s)") + plt.ylabel("Validation Accuracy") + plt.scatter(time_history, 1 - np.array(valid_loss_history)) + plt.step( + time_history, + 1 - np.array(best_valid_loss_history), + where="post", + ) + plt.show() + + helper() + def __del__(self): if ( hasattr(self, "_trained_estimator") diff --git a/notebook/visualize.ipynb b/notebook/visualize.ipynb new file mode 100644 index 0000000000..d60275cb09 --- /dev/null +++ b/notebook/visualize.ipynb @@ -0,0 +1,136 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + ": MADV_DONTNEED does not work (memset will be used instead)\n", + ": (This is the expected behaviour if you are running under QEMU)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Enabling notebook extension jupyter-js-widgets/extension...\n", + " - Validating: \u001b[32mOK\u001b[0m\n", + "load dataset from ./openml_ds1169.pkl\n", + "Dataset name: airlines\n", + "X_train.shape: (404537, 7), y_train.shape: (404537,);\n", + "X_test.shape: (134846, 7), y_test.shape: (134846,)\n", + "[flaml.automl.automl: 01-27 05:48:20] {2715} INFO - task = classification\n", + "[flaml.automl.automl: 01-27 05:48:20] {2717} INFO - Data split method: stratified\n", + "[flaml.automl.automl: 01-27 05:48:20] {2720} INFO - Evaluation method: holdout\n", + "[flaml.automl.automl: 01-27 05:48:22] {2847} INFO - Minimizing error metric: 1-accuracy\n", + "[flaml.automl.automl: 01-27 05:48:22] {2993} INFO - List of ML learners in AutoML Run: ['lgbm', 'rf', 'catboost', 'xgboost', 'extra_tree', 'xgb_limitdepth', 'lrl1']\n", + "[flaml.automl.automl: 01-27 05:48:22] {3322} INFO - iteration 0, current learner lgbm\n", + "[flaml.automl.automl: 01-27 05:48:23] {3461} INFO - Estimated sufficient time budget=275476s. Estimated necessary time budget=6763s.\n", + "[flaml.automl.automl: 01-27 05:48:23] {3513} INFO - at 4.4s,\testimator lgbm's best error=0.4459,\tbest estimator lgbm's best error=0.4459\n", + "[flaml.automl.automl: 01-27 05:48:23] {3322} INFO - iteration 1, current learner lgbm\n", + "[flaml.automl.automl: 01-27 05:48:23] {3513} INFO - at 4.9s,\testimator lgbm's best error=0.4459,\tbest estimator lgbm's best error=0.4459\n", + "[flaml.automl.automl: 01-27 05:48:23] {3322} INFO - iteration 2, current learner lgbm\n", + "[flaml.automl.automl: 01-27 05:48:24] {3513} INFO - at 5.4s,\testimator lgbm's best error=0.3777,\tbest estimator lgbm's best error=0.3777\n", + "[flaml.automl.automl: 01-27 05:48:24] {3322} INFO - iteration 3, current learner lgbm\n", + "[flaml.automl.automl: 01-27 05:48:24] {3513} INFO - at 5.8s,\testimator lgbm's best error=0.3777,\tbest estimator lgbm's best error=0.3777\n", + "[flaml.automl.automl: 01-27 05:48:24] {3322} INFO - iteration 4, current learner lgbm\n", + "[flaml.automl.automl: 01-27 05:48:24] {3513} INFO - at 6.0s,\testimator lgbm's best error=0.3777,\tbest estimator lgbm's best error=0.3777\n", + "[flaml.automl.automl: 01-27 05:48:24] {3322} INFO - iteration 5, current learner lgbm\n", + "[flaml.automl.automl: 01-27 05:48:25] {3513} INFO - at 6.4s,\testimator lgbm's best error=0.3777,\tbest estimator lgbm's best error=0.3777\n", + "[flaml.automl.automl: 01-27 05:48:25] {3322} INFO - iteration 6, current learner lgbm\n", + "[flaml.automl.automl: 01-27 05:48:25] {3513} INFO - at 6.9s,\testimator lgbm's best error=0.3777,\tbest estimator lgbm's best error=0.3777\n", + "[flaml.automl.automl: 01-27 05:48:25] {3322} INFO - iteration 7, current learner xgboost\n", + "[flaml.automl.automl: 01-27 05:48:26] {3513} INFO - at 8.0s,\testimator xgboost's best error=0.3787,\tbest estimator lgbm's best error=0.3777\n", + "[flaml.automl.automl: 01-27 05:48:26] {3322} INFO - iteration 8, current learner lgbm\n", + "[flaml.automl.automl: 01-27 05:48:27] {3513} INFO - at 8.6s,\testimator lgbm's best error=0.3777,\tbest estimator lgbm's best error=0.3777\n", + "[flaml.automl.automl: 01-27 05:48:27] {3322} INFO - iteration 9, current learner xgboost\n", + "[flaml.automl.automl: 01-27 05:48:28] {3513} INFO - at 9.6s,\testimator xgboost's best error=0.3787,\tbest estimator lgbm's best error=0.3777\n", + "[flaml.automl.automl: 01-27 05:48:28] {3322} INFO - iteration 10, current learner extra_tree\n", + "[flaml.automl.automl: 01-27 05:48:28] {3513} INFO - at 9.9s,\testimator extra_tree's best error=0.4459,\tbest estimator lgbm's best error=0.3777\n", + "[flaml.automl.automl: 01-27 05:48:28] {3322} INFO - iteration 11, current learner rf\n", + "[flaml.automl.automl: 01-27 05:48:28] {3513} INFO - at 10.2s,\testimator rf's best error=0.4421,\tbest estimator lgbm's best error=0.3777\n", + "[flaml.automl.automl: 01-27 05:48:31] {3773} INFO - retrain lgbm for 2.3s\n", + "[flaml.automl.automl: 01-27 05:48:31] {3778} INFO - retrained model: LGBMClassifier(learning_rate=0.26770501231052046, max_bin=127,\n", + " min_child_samples=12, n_estimators=4, num_leaves=4,\n", + " reg_alpha=0.001348364934537134, reg_lambda=1.4442580148221913,\n", + " verbose=-1)\n", + "[flaml.automl.automl: 01-27 05:48:31] {3023} INFO - fit succeeded\n", + "[flaml.automl.automl: 01-27 05:48:31] {3025} INFO - Time taken to find the best model: 5.436654806137085\n" + ] + } + ], + "source": [ + "from flaml.automl.automl import AutoML\n", + "from flaml.data import load_openml_dataset\n", + "!jupyter nbextension enable --py widgetsnbextension\n", + "\n", + "X_train, X_test, y_train, y_test = load_openml_dataset(dataset_id=1169, data_dir=\"./\")\n", + "settings = {\n", + " \"time_budget\": 10, # total running time in seconds\n", + " \"metric\": \"accuracy\", # can be: 'r2', 'rmse', 'mae', 'mse', 'accuracy', 'roc_auc', 'roc_auc_ovr',\n", + " # 'roc_auc_ovo', 'log_loss', 'mape', 'f1', 'ap', 'ndcg', 'micro_f1', 'macro_f1'\n", + " \"task\": \"classification\", # task type\n", + " \"log_file_name\": \"airlines_experiment.log\", # flaml log file\n", + " \"seed\": 7654321, # random seed\n", + "}\n", + "automl = AutoML(**settings)\n", + "automl.fit(X_train=X_train, y_train=y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "a652c19b24404f6cb9d3f2938b033ec8", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "interactive(children=(Dropdown(description='option', options=('Feature Importance', 'Learning Curve'), value='…" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "automl.visualize()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.16" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}