1""" 2==================================================================== 3Plot the decision surfaces of ensembles of trees on the iris dataset 4==================================================================== 5 6Plot the decision surfaces of forests of randomized trees trained on pairs of 7features of the iris dataset. 8 9This plot compares the decision surfaces learned by a decision tree classifier 10(first column), by a random forest classifier (second column), by an extra- 11trees classifier (third column) and by an AdaBoost classifier (fourth column). 12 13In the first row, the classifiers are built using the sepal width and 14the sepal length features only, on the second row using the petal length and 15sepal length only, and on the third row using the petal width and the 16petal length only. 17 18In descending order of quality, when trained (outside of this example) on all 194 features using 30 estimators and scored using 10 fold cross validation, 20we see:: 21 22 ExtraTreesClassifier() # 0.95 score 23 RandomForestClassifier() # 0.94 score 24 AdaBoost(DecisionTree(max_depth=3)) # 0.94 score 25 DecisionTree(max_depth=None) # 0.94 score 26 27Increasing `max_depth` for AdaBoost lowers the standard deviation of 28the scores (but the average score does not improve). 29 30See the console's output for further details about each model. 31 32In this example you might try to: 33 341) vary the ``max_depth`` for the ``DecisionTreeClassifier`` and 35 ``AdaBoostClassifier``, perhaps try ``max_depth=3`` for the 36 ``DecisionTreeClassifier`` or ``max_depth=None`` for ``AdaBoostClassifier`` 372) vary ``n_estimators`` 38 39It is worth noting that RandomForests and ExtraTrees can be fitted in parallel 40on many cores as each tree is built independently of the others. AdaBoost's 41samples are built sequentially and so do not use multiple cores. 42 43""" 44 45import numpy as np 46import matplotlib.pyplot as plt 47from matplotlib.colors import ListedColormap 48 49from sklearn.datasets import load_iris 50from sklearn.ensemble import ( 51 RandomForestClassifier, 52 ExtraTreesClassifier, 53 AdaBoostClassifier, 54) 55from sklearn.tree import DecisionTreeClassifier 56 57# Parameters 58n_classes = 3 59n_estimators = 30 60cmap = plt.cm.RdYlBu 61plot_step = 0.02 # fine step width for decision surface contours 62plot_step_coarser = 0.5 # step widths for coarse classifier guesses 63RANDOM_SEED = 13 # fix the seed on each iteration 64 65# Load data 66iris = load_iris() 67 68plot_idx = 1 69 70models = [ 71 DecisionTreeClassifier(max_depth=None), 72 RandomForestClassifier(n_estimators=n_estimators), 73 ExtraTreesClassifier(n_estimators=n_estimators), 74 AdaBoostClassifier(DecisionTreeClassifier(max_depth=3), n_estimators=n_estimators), 75] 76 77for pair in ([0, 1], [0, 2], [2, 3]): 78 for model in models: 79 # We only take the two corresponding features 80 X = iris.data[:, pair] 81 y = iris.target 82 83 # Shuffle 84 idx = np.arange(X.shape[0]) 85 np.random.seed(RANDOM_SEED) 86 np.random.shuffle(idx) 87 X = X[idx] 88 y = y[idx] 89 90 # Standardize 91 mean = X.mean(axis=0) 92 std = X.std(axis=0) 93 X = (X - mean) / std 94 95 # Train 96 model.fit(X, y) 97 98 scores = model.score(X, y) 99 # Create a title for each column and the console by using str() and 100 # slicing away useless parts of the string 101 model_title = str(type(model)).split(".")[-1][:-2][: -len("Classifier")] 102 103 model_details = model_title 104 if hasattr(model, "estimators_"): 105 model_details += " with {} estimators".format(len(model.estimators_)) 106 print(model_details + " with features", pair, "has a score of", scores) 107 108 plt.subplot(3, 4, plot_idx) 109 if plot_idx <= len(models): 110 # Add a title at the top of each column 111 plt.title(model_title, fontsize=9) 112 113 # Now plot the decision boundary using a fine mesh as input to a 114 # filled contour plot 115 x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1 116 y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1 117 xx, yy = np.meshgrid( 118 np.arange(x_min, x_max, plot_step), np.arange(y_min, y_max, plot_step) 119 ) 120 121 # Plot either a single DecisionTreeClassifier or alpha blend the 122 # decision surfaces of the ensemble of classifiers 123 if isinstance(model, DecisionTreeClassifier): 124 Z = model.predict(np.c_[xx.ravel(), yy.ravel()]) 125 Z = Z.reshape(xx.shape) 126 cs = plt.contourf(xx, yy, Z, cmap=cmap) 127 else: 128 # Choose alpha blend level with respect to the number 129 # of estimators 130 # that are in use (noting that AdaBoost can use fewer estimators 131 # than its maximum if it achieves a good enough fit early on) 132 estimator_alpha = 1.0 / len(model.estimators_) 133 for tree in model.estimators_: 134 Z = tree.predict(np.c_[xx.ravel(), yy.ravel()]) 135 Z = Z.reshape(xx.shape) 136 cs = plt.contourf(xx, yy, Z, alpha=estimator_alpha, cmap=cmap) 137 138 # Build a coarser grid to plot a set of ensemble classifications 139 # to show how these are different to what we see in the decision 140 # surfaces. These points are regularly space and do not have a 141 # black outline 142 xx_coarser, yy_coarser = np.meshgrid( 143 np.arange(x_min, x_max, plot_step_coarser), 144 np.arange(y_min, y_max, plot_step_coarser), 145 ) 146 Z_points_coarser = model.predict( 147 np.c_[xx_coarser.ravel(), yy_coarser.ravel()] 148 ).reshape(xx_coarser.shape) 149 cs_points = plt.scatter( 150 xx_coarser, 151 yy_coarser, 152 s=15, 153 c=Z_points_coarser, 154 cmap=cmap, 155 edgecolors="none", 156 ) 157 158 # Plot the training points, these are clustered together and have a 159 # black outline 160 plt.scatter( 161 X[:, 0], 162 X[:, 1], 163 c=y, 164 cmap=ListedColormap(["r", "y", "b"]), 165 edgecolor="k", 166 s=20, 167 ) 168 plot_idx += 1 # move on to the next plot in sequence 169 170plt.suptitle("Classifiers on feature subsets of the Iris dataset", fontsize=12) 171plt.axis("tight") 172plt.tight_layout(h_pad=0.2, w_pad=0.2, pad=2.5) 173plt.show() 174