Decisiontree.py import numpy as np import pandas as pd import matplotlib.pyplot as plt import seaborn as sns from sklearn import tree from sklearn.metrics import accuracy_score from sklearn.datasets import load_breast_cancer from sklearn.model_selection import train_test_split from sklearn.tree import DecisionTreeClassifier X,y=load_breast_cancer(return_X_y=True) X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=0) clf=DecisionTreeClassifier(random_state=0) clf.fit(X_train,y_train) y_train_predicted=clf.predict(X_train) y_test_predicted=clf.predict(X_test) train_acc=accuracy_score(y_train,y_train_predicted) test_acc=accuracy_score(y_test,y_test_predicted) print("accuracy of training dataset:",train_acc) print("accuracy of test dataset:",test_acc) plt.figure(figsize=(16,8)) tree.plot_tree(clf) plt.show() # Post-Pruning path=clf.cost_complexity_pruning_path(X_train,y_train) ccp_alphas,impurities=path.ccp_alphas,path.impurities print("ccp alpha wil give list of values :",ccp_alphas) print("***********************************************************") print("Impurities in Decision Tree :",impurities) clfs=[] for ccp_alpha in ccp_alphas: clf=DecisionTreeClassifier(random_state=0,ccp_alpha=ccp_alpha) clf.fit(X_train,y_train) clfs.append(clf) print("Last node in Decision tree is {} and ccp_alpha for last node is {}".format(clfs[- 1].tree_.node_count,ccp_alphas[-1])) train_scores = [clf.score(X_train, y_train) for clf in clfs] test_scores = [clf.score(X_test, y_test) for clf in clfs] fig, ax = plt.subplots() ax.set_xlabel("alpha") ax.set_ylabel("accuracy") ax.set_title("Accuracy vs alpha for training and testing sets") ax.plot(ccp_alphas, train_scores, marker='o', label="train",drawstyle="steps-post") ax.plot(ccp_alphas, test_scores, marker='o', label="test",drawstyle="steps-post") ax.legend() plt.show() clf=DecisionTreeClassifier(random_state=0,ccp_alpha=0.02) clf.fit(X_train,y_train) plt.figure(figsize=(12,8)) tree.plot_tree(clf,rounded=True,filled=True) plt.show() acc=accuracy_score(y_test,clf.predict(X_test)) print("accuracy of post-pruning operation:",acc) clf=DecisionTreeClassifier(criterion= 'gini',max_depth= 17,min_samples_leaf= 3,min_samples_split= 12,splitter= 'random') clf.fit(X_train,y_train) plt.figure(figsize=(20,12)) tree.plot_tree(clf,rounded=True,filled=True) plt.show() y_predicted=clf.predict(X_test) accuracy=accuracy_score(y_test,y_predicted) print("accuracy of pre-pruning operation:",accuracy)