from IPython import get_ipython get_ipython().magic('reset -sf') #%% import pandas as pd import numpy as np np.random.seed(seed=42) import math import sklearn.datasets import sklearn.tree import collections from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score from sklearn.model_selection import GridSearchCV import tensorflow import keras from keras.datasets import mnist import graphviz from npeet import entropy_estimators as ee # from scipy.stats import entropy import matplotlib.colors as col import matplotlib.pyplot as plt import tikzplotlib as tikz #tikz.save('plot.tex') import seaborn as sns sns.set_theme() def generate_2D_data(k): n = 2 ** k N = n ** 2 print('nb. of samples = ' + str(N)) X = [] Y = [] for i in np.arange(1, n + 1): for j in np.arange(1, n + 1): X.append([i, j]) if i > j: Y.append(1) elif i < j: Y.append(-1) else: if i % 2 == 0: Y.append(1) else: Y.append(-1) X = np.array(X) Y = np.array(Y) return [X,Y] def compute_nodes_info(X,Y,clf): N = len(X) # nb. of samples labels = set(Y) C = len(labels) # nb. of classes # Computing p(y) labels_nb = np.zeros(C) for i in Y: for c in range(C): if i==list(labels)[c]: labels_nb[c] +=1 # depths = get_node_depths(clf.tree_) n_nodes = clf.tree_.node_count # collecting samples from each node # from : https://stackoverflow.com/questions/45398737/is-there-any-way-to-get-samples-under-each-leaf-of-a-decision-tree samples = collections.defaultdict(list) dec_paths = clf.decision_path(X) for d, dec in enumerate(dec_paths): for i in range(n_nodes): if dec.toarray()[0][i] == 1: samples[i].append(d) df_nodes = pd.DataFrame(index=range(n_nodes), columns = ['proba','h1', 'h2']) for n in range(n_nodes): n_samples_id = samples[n] # id of the samples in node n proba_node = len(n_samples_id) / N # probability to belong to node n # Computing the node's contribution to the layer's entropy H(T) df_nodes.loc[n,'proba'] = proba_node if proba_node > 0: df_nodes.loc[n,'h1'] = - proba_node * math.log2(proba_node) else: df_nodes.loc[n,'h1'] = 0 # Build a partition of the samples in the node by separating them by class class_partition = {c: [] for c in range(C)} for s in n_samples_id: cnt = 0 for y in labels: if Y[s] == y: class_partition[cnt].append(s) break else: cnt += 1 # Computing the node's contribution to the layer's conditional entropy H(T|Y) h2 = 0 for c in range(C): proba_node_given_y = len(class_partition[c]) / labels_nb[c] #print(proba_node_given_y) if proba_node_given_y > 0: h2 += - proba_node_given_y * labels_nb[c]/N * math.log2(proba_node_given_y) df_nodes.loc[n,'h2'] = h2 return df_nodes def get_layers(X,Y,clf): ### from https://scikit-learn.org/stable/auto_examples/tree/plot_unveil_tree_structure.html n_nodes = clf.tree_.node_count children_left = clf.tree_.children_left children_right = clf.tree_.children_right feature = clf.tree_.feature threshold = clf.tree_.threshold node_depth = np.zeros(shape=n_nodes, dtype=np.int64) is_leaves = np.zeros(shape=n_nodes, dtype=bool) stack = [(0, 0)] # start with the root node id (0) and its depth (0) while len(stack) > 0: # `pop` ensures each node is only visited once node_id, depth = stack.pop() node_depth[node_id] = depth # If the left and right child of a node is not the same we have a split # node is_split_node = children_left[node_id] != children_right[node_id] # If a split node, append left and right children and depth to `stack` # so we can loop through them if is_split_node: stack.append((children_left[node_id], depth + 1)) stack.append((children_right[node_id], depth + 1)) else: is_leaves[node_id] = True ### K = max(node_depth) # maximal depth of the tree layers = {x: [] for x in range(K+1)} for n in range(n_nodes): d = node_depth[n] layers[d].append(n) if is_leaves[n]: for l in np.arange(d+1,K+1): layers[l].append(n) return layers def compute_layers_info(df_nodes, layers, X, Y, clf): K = len(layers) df_layers = pd.DataFrame(index=range(K), columns = ['I_TX', 'H(T|Y)', 'I_YT', 'Del_G', 'Del_C', 'H_Y']) h_y = ee.entropyd(Y) df_layers['H_Y'] = h_y for l in layers: I_TX = 0 I_YT = 0 H_TY = 0 for n in layers[l]: I_TX += df_nodes.loc[n,'h1'] H_TY += df_nodes.loc[n,'h2'] I_YT += df_nodes.loc[n,'h1'] - df_nodes.loc[n,'h2'] df_layers.loc[l,'I_TX'] = I_TX df_layers.loc[l,'H(T|Y)'] = H_TY df_layers.loc[l,'I_YT'] = I_YT df_layers.loc[l,'Del_G'] = (I_YT) / h_y df_layers.loc[l,'Del_C'] = (I_TX) / h_y # adding the last layer (class prediction) last_layer = clf.predict(X) df_layers.loc[K,'I_TX'] = ee.midd(last_layer,X) df_layers.loc[K,'I_YT'] = ee.midd(Y, last_layer) df_layers.loc[K,'Del_G'] = (df_layers.loc[K,'I_YT'] ) /h_y df_layers.loc[K,'Del_C'] = (df_layers.loc[K,'I_TX'] ) / h_y return df_layers def get_deterministic_IB_curve(H_input, H_output): complexity_scale = np.linspace(start=0, stop=H_input, num=100) IB_curve = [] for i in complexity_scale: if i <= H_output: IB_curve.append(i) else: IB_curve.append(H_output) return np.array(IB_curve) def plot_info_plane(df_layers,data_name,pruned=False,full_set=False): [X,Y,X_train,Y_train,X_test,Y_test, features, labels] = get_data(data_name) # get prediction rep n_layers = len(df_layers['I_YT']) - 1 pred_layer = np.ravel(df_layers.iloc[-1:].to_numpy()) # get other representations inside_rep = df_layers.iloc[:-1] # Full set IB curve h_x = 8 #math.log2(X.shape[0]) h_y = ee.entropyd(Y) IB_curve = get_deterministic_IB_curve(h_x, h_y) # Train set IB curve h_y_train = df_layers.loc[0,'H_Y'] IB_curve_train = get_deterministic_IB_curve(h_x, h_y_train) # Get the I_Y of IB_test for comparison h_y_test = ee.entropyd(Y_test) print("IB_full I_Y at : " + str(h_y)) print("IB_train I_Y at : " + str(h_y_train)) print("IB_test I_Y at : " + str(h_y_test)) complexity_scale = np.linspace(start=0, stop=h_x, num=100) # plot the information plane sns.set_theme() fig, ax = plt.subplots() ax.plot(complexity_scale, IB_curve, color="b", linewidth=2.5, label='$IB_{full}$',zorder=2) ax.plot(complexity_scale, IB_curve_train, color="black",linewidth=2, dashes=(4, 6),label='$IB_{train}$',zorder=2) ax.scatter(inside_rep['I_TX'],inside_rep['I_YT'],c=range(len(inside_rep['I_TX'])),cmap='viridis',s=100,alpha=1,zorder=3) ax.scatter(pred_layer[0], pred_layer[2], color='darkorange', marker='*', s=250,alpha=1,zorder=3) ax.set_xlabel('$I(T;X)$',fontsize=20) # Add an x-label to the axes. ax.set_ylabel('$I(Y;T)$',fontsize=20) # Add a y-label to the axes. #ax.tick_params(axis='both',which='major', labelsize=18) ax.legend(fontsize=20,loc=4) plt.tight_layout() #tikz.save('Figures/IP/' + data_name + '_IP.tex') if pruned==True: plt.savefig('Figures/IP/' + data_name + '_IP_pruned.png') elif full_set==True: plt.savefig('Figures/IP/' + data_name + '_IP_fullset.png') else: plt.savefig('Figures/IP/' + data_name + '_IP.png') def plot_info_plane_train_old(df_layers,data_name,badfit=False): [X,Y,X_train,Y_train,X_test,Y_test, features, labels] = get_data(data_name) n_layers = len(df_layers['I_YT']) - 1 pred_layer = np.ravel(df_layers.iloc[-1:].to_numpy()) # Full set IB curve h_x = 8 h_y = df_layers.loc[0,'H_Y'] IB_curve = get_deterministic_IB_curve(h_x, h_y) complexity_scale = np.linspace(start=0, stop=h_x, num=100) # plot all layers except prediction layer g = sns.relplot(data=df_layers.iloc[:-1], x="I_TX", y="I_YT", hue=np.arange(0, n_layers), palette = "viridis", s=130, legend=False) # plot prediction layer g.ax.scatter(pred_layer[0], pred_layer[2], color='darkorange', marker='*', s=180) # plot IB curve g.ax.plot(complexity_scale, IB_curve, color="b") g.set_axis_labels("I(T;X)", "I(Y;T)", labelpad=10, size=15) g.fig.set_size_inches(6.5, 4) #tikz.save('Figures/' + data_name + '_IP.tex') if badfit==True: plt.savefig('Figures/IP/' + data_name + '_IP_badfit.png') else: plt.savefig('Figures/IP/' + data_name + '_IP_goodfit.png') def plot_info_plane_train(df_layers,data_name,badfit=False): [X,Y,X_train,Y_train,X_test,Y_test, features, labels] = get_data(data_name) # get prediction rep n_layers = len(df_layers['I_YT']) - 1 pred_layer = np.ravel(df_layers.iloc[-1:].to_numpy()) # get other representations inside_rep = df_layers.iloc[:-1] # Full set IB curve h_x = 8#math.log2(X.shape[0]) h_y = df_layers.loc[0,'H_Y'] IB_curve = get_deterministic_IB_curve(h_x, h_y) complexity_scale = np.linspace(start=0, stop=h_x, num=100) # plot the information plane sns.set_theme() fig, ax = plt.subplots() ax.plot(complexity_scale, IB_curve, color="black",linewidth=2, dashes=(4, 6),label='$IB_{train}$',zorder=2) ax.scatter(inside_rep['I_TX'],inside_rep['I_YT'],c=range(len(inside_rep['I_TX'])),cmap='viridis',s=100,alpha=1,zorder=3) ax.scatter(pred_layer[0], pred_layer[2], color='darkorange', marker='*', s=250,alpha=1,zorder=3) ax.set_xlabel('$I(T;X)$',fontsize=20) # Add an x-label to the axes. ax.set_ylabel('$I(Y;T)$',fontsize=20) # Add a y-label to the axes. #ax.tick_params(axis='both',which='major', labelsize=18) ax.legend(fontsize=20,loc=4) plt.tight_layout() #tikz.save('Figures/' + data_name + '_IP.tex') if badfit==True: plt.savefig('Figures/IP/' + data_name + '_IP_badfit.png') else: plt.savefig('Figures/IP/' + data_name + '_IP_goodfit.png') def get_data(name): if name=='MNIST': (X_train, Y_train), (X_test, Y_test) = mnist.load_data() X_train = np.reshape(X_train, (X_train.shape[0],X_train.shape[1]*X_train.shape[1])) X_test = np.reshape(X_test, (X_test.shape[0],X_test.shape[1]*X_test.shape[1])) X = np.vstack((X_train,X_test)) Y = np.hstack((Y_train,Y_test)) # X_train, X_test, Y_train, Y_test = train_test_split( X, Y, test_size=2/3, random_state=45) data_features = None #np.arange(0,X.shape[1]) data_labels = [str(lab) for lab in range(10)] return [X,Y,X_train,Y_train,X_test,Y_test,data_features,data_labels] elif name=='MNIST_trainIB_below': (X_train, Y_train), (X_test, Y_test) = mnist.load_data() X_train = np.reshape(X_train, (X_train.shape[0],X_train.shape[1]*X_train.shape[1])) X_test = np.reshape(X_test, (X_test.shape[0],X_test.shape[1]*X_test.shape[1])) X = np.vstack((X_train,X_test)) Y = np.hstack((Y_train,Y_test)) X_train, X_test, Y_train, Y_test = train_test_split( X, Y, test_size=0.999, random_state=45) data_features = None #np.arange(0,X.shape[1]) data_labels = [str(lab) for lab in range(10)] return [X,Y,X_train,Y_train,X_test,Y_test,data_features,data_labels] elif name=='MNIST_small': n_train = 3000 n_test = 1000 (X_train, Y_train), (X_test, Y_test) = mnist.load_data() X_train = np.reshape(X_train, (X_train.shape[0],X_train.shape[1]*X_train.shape[1])) X_test = np.reshape(X_test, (X_test.shape[0],X_test.shape[1]*X_test.shape[1])) X_train = X_train[:n_train] Y_train = Y_train[:n_train] X_test = X_test[:n_test] Y_test = Y_test[:n_test] X = np.vstack((X_train,X_test)) Y = np.hstack((Y_train,Y_test)) data_features = None #np.arange(0,X.shape[1]) data_labels = [str(lab) for lab in range(10)] return [X,Y,X_train,Y_train,X_test,Y_test,data_features,data_labels] elif name=='2D_noisy': np.random.seed(seed=42) N = 150 X = np.zeros((N*N, 2)) X[:, 0] = np.repeat(np.arange(0, 1, 1/N), N) X[:, 1] = np.concatenate([np.arange(0, 1, 1/N)]*N) X = X-X.mean(axis=0) Z = X + np.random.normal(0, 0.06, size=X.shape) Y = np.zeros((N*N, 2)) Y[np.square(Z-0.5).sum(1) > 0.2, 0] = 1 Y[:, 1] = 1 - Y.sum(1) Y = Y[:,0] X_train, X_test, Y_train, Y_test = train_test_split( X, Y, test_size=1/7, random_state=42) data_features = None data_labels = ['Black','Copper'] return [X,Y,X_train,Y_train,X_test,Y_test,data_features,data_labels] elif name=='2D_noisy_trainIB_below': np.random.seed(seed=42) N = 150 X = np.zeros((N*N, 2)) X[:, 0] = np.repeat(np.arange(0, 1, 1/N), N) X[:, 1] = np.concatenate([np.arange(0, 1, 1/N)]*N) X = X-X.mean(axis=0) Z = X + np.random.normal(0, 0.06, size=X.shape) Y = np.zeros((N*N, 2)) Y[np.square(Z-0.5).sum(1) > 0.2, 0] = 1 Y[:, 1] = 1 - Y.sum(1) Y = Y[:,0] X_train, X_test, Y_train, Y_test = train_test_split( X, Y, test_size=0.99, random_state=5) data_features = None data_labels = ['Black','Copper'] return [X,Y,X_train,Y_train,X_test,Y_test,data_features,data_labels] elif name=='2D_noisy_trainIB_above': np.random.seed(seed=42) N = 150 X = np.zeros((N*N, 2)) X[:, 0] = np.repeat(np.arange(0, 1, 1/N), N) X[:, 1] = np.concatenate([np.arange(0, 1, 1/N)]*N) X = X-X.mean(axis=0) Z = X + np.random.normal(0, 0.06, size=X.shape) Y = np.zeros((N*N, 2)) Y[np.square(Z-0.5).sum(1) > 0.2, 0] = 1 Y[:, 1] = 1 - Y.sum(1) Y = Y[:,0] X_train, X_test, Y_train, Y_test = train_test_split( X, Y, test_size=0.99, random_state=1) data_features = None data_labels = ['Black','Copper'] return [X,Y,X_train,Y_train,X_test,Y_test,data_features,data_labels] elif name=='iris': iris = sklearn.datasets.load_iris() X = iris.data Y = iris.target data_features = iris.feature_names data_labels = iris.target_names X_train, X_test, Y_train, Y_test = train_test_split( X, Y, test_size=1/7, random_state=42) return [X,Y,X_train,Y_train,X_test,Y_test,data_features,data_labels] elif name=='tennis': df = pd.read_csv('playTennis.csv', header=0, quotechar='"', sep=',', na_values = ['na', '-', '.', '']) df = df.dropna() df = df.astype('category') df[['Class', 'wind']] = df[['wind', 'Class']] df = df.rename(columns={'wind': 'Class', 'Class': 'wind'}) df = df.astype('category') data_features = df.columns.tolist()[:-1] data_labels = list(df['Class'].cat.categories) # outlook = [overcast:0, rain:1, sunny:2] df['outlook'] = df['outlook'].cat.rename_categories([0, 1, 2]) # temperature = [cool:0, hot:1, mild:2] df['temperature'] = df['temperature'].cat.rename_categories([0, 1, 2]) # humidity = [high:0, normal:1] df['humidity'] = df['humidity'].cat.rename_categories([0, 1]) # wind = [strong:0, weak:1] df['wind'] = df['wind'].cat.rename_categories([0, 1]) # Class = [no:0, yes:1] df['Class'] = df['Class'].cat.rename_categories([0, 1]) Y = df[df.columns[-1]].to_numpy() X = df[df.columns[:-1]].to_numpy() X_train, X_test, Y_train, Y_test = train_test_split( X, Y, test_size=1/3, random_state=42) return [X,Y,X_train,Y_train,X_test,Y_test,data_features,data_labels] elif name=='2D': return generate_2D_data(5) def get_best_alpha_old(clf,data_name,max_depth=None): # source : https://scikit-learn.org/stable/auto_examples/tree/plot_cost_complexity_pruning.html#sphx-glr-auto-examples-tree-plot-cost-complexity-pruning-py [X,Y,X_train,Y_train,X_test,Y_test, features, labels] = get_data(data_name) # Get all effective alphas path = clf.cost_complexity_pruning_path(X_train, Y_train) ccp_alphas = path.ccp_alphas # find the best alpha with test set (limit if too many values) if data_name == 'MNIST' and len(ccp_alphas) > 10: ccp_alphas = np.linspace(ccp_alphas[0],ccp_alphas[-1]/50,20) elif data_name == 'MNIST_small' and len(ccp_alphas) > 15: ccp_alphas = np.linspace(ccp_alphas[0],ccp_alphas[-1]/6,100) clfs = [] k=0 for ccp_alpha in ccp_alphas: print("alpha value nb : " + str(k) + '/' + str(len(ccp_alphas))) k+=1 clf = sklearn.tree.DecisionTreeClassifier(random_state=42, max_depth=max_depth, criterion="entropy", ccp_alpha=ccp_alpha) clf.fit(X_train, Y_train) clfs.append(clf) # train trees with different alpha values train_scores = [clf.score(X_train, Y_train) for clf in clfs] test_scores = [clf.score(X_test, Y_test) for clf in clfs] fig, ax = plt.subplots() ax.set_xlabel("alpha") ax.set_ylabel("accuracy") ax.set_title("Accuracy vs alpha for training and testing sets") ax.plot(ccp_alphas, train_scores, marker='o', label="train", drawstyle="steps-post") ax.plot(ccp_alphas, test_scores, marker='o', label="test", drawstyle="steps-post") ax.legend() plt.show() best_alpha = ccp_alphas[np.argmax(test_scores)] return best_alpha def get_best_alpha(clf,data_name,max_depth=None): [X,Y,X_train,Y_train,X_test,Y_test, features, labels] = get_data(data_name) # Get all effective alphas path = clf.cost_complexity_pruning_path(X_train, Y_train) ccp_alphas = path.ccp_alphas # find the best alpha with test set (limit if too many values) if data_name == 'MNIST': ccp_alphas = np.linspace(ccp_alphas[0],ccp_alphas[-1]/50,20) elif data_name == 'MNIST_small': ccp_alphas = np.linspace(ccp_alphas[0],ccp_alphas[-1]/6,100) elif data_name == '2D_noisy': ccp_alphas = np.linspace(ccp_alphas[0],ccp_alphas[-1]/20,20) parameters = {'ccp_alpha':ccp_alphas} model = sklearn.tree.DecisionTreeClassifier(random_state=42, max_depth=max_depth, criterion="entropy") best_clf = GridSearchCV(model, parameters, n_jobs=4, verbose=4) # default 5 k-folds best_clf.fit(X_train, Y_train) tree_model = best_clf.best_estimator_ print (best_clf.best_score_, best_clf.best_params_) # accuracy on training, test and full set pred_train = best_clf.predict(X_train) pred_test = best_clf.predict(X_test) pred_full = best_clf.predict(X) print("accuracy on training set = " + str(accuracy_score(Y_train, pred_train))) print("accuracy on test set = " + str(accuracy_score(Y_test, pred_test))) print("accuracy on whole set = " + str(accuracy_score(Y, pred_full))) return tree_model def fit_tree(data_name,max_depth=None,alpha=0,pruned=False,full_set=False,plot_tree=False): [X,Y,X_train,Y_train,X_test,Y_test, features, labels] = get_data(data_name) clf = sklearn.tree.DecisionTreeClassifier(random_state=42, max_depth=max_depth, criterion="entropy", ccp_alpha=alpha) # Check if we train model on full set or training set if full_set==True: clf.fit(X,Y) else: clf.fit(X_train, Y_train) # accuracy on training, test and full set pred_train = clf.predict(X_train) pred_test = clf.predict(X_test) pred_full = clf.predict(X) print("accuracy on training set = " + str(accuracy_score(Y_train, pred_train))) print("accuracy on test set = " + str(accuracy_score(Y_test, pred_test))) print("accuracy on whole set = " + str(accuracy_score(Y, pred_full))) # if toy dataset, plot and save sample space if full_set==True: if data_name=='2D_noisy': N=150 sns.set_style("dark") # plot sample space plt.figure(figsize=(8, 8)) plt.imshow(Y.reshape((N,N)), cmap='copper') plt.tick_params(labelbottom=False, labelleft=False) plt.savefig('Figures/partitions/' + data_name + '_space.png') # plot partition plt.figure(figsize=(8, 8)) plt.imshow(pred_full.reshape((N,N)), cmap='copper') plt.tick_params(labelbottom=False, labelleft=False) if max_depth==None: plt.savefig('Figures/partitions/' + data_name + '_space_goodfit.png') elif max_depth==5: plt.savefig('Figures/partitions/' + data_name + '_space_badfit.png') plt.show() # possible to plot tree rep also if plot_tree==True: # save tree figure dot_data = sklearn.tree.export_graphviz(clf, out_file=None, filled=True, rounded=True, special_characters=True, feature_names=features, class_names=labels, leaves_parallel = True, proportion = False, rotate = True) graph = graphviz.Source(dot_data) graph.format = 'png' if full_set==True: if max_depth==None: graph.render('Figures/trees/' + data_name + '_tree_goodfit') elif max_depth==5: graph.render('Figures/trees/' + data_name + '_tree_badfit') #graph.view('Figures/trees/' + data_name + '_tree_fullfit') else: if pruned==False: #graph.render('Figures/trees/' + data_name + '_tree_trainfit') graph.format = 'png' graph.render('Figures/trees/' + data_name + '_tree_trainfit') #graph.view('Figures/trees/' + data_name + '_tree_trainfit') elif pruned==True: #graph.render('Figures/trees/' + data_name + '_tree_trainfit_pruned') graph.format = 'png' graph.render('Figures/trees/' + data_name + '_tree_trainfit_pruned') #graph.view('Figures/trees/' + data_name + '_tree_trainfit_pruned') return clf def run_generalization_exp(data_name,experiment = 0): # EXP I (a) : IB_train < IB_full if experiment==1.1: # Get the modified datasets if data_name=='2D_noisy': data_name='2D_noisy_trainIB_below' elif data_name=='MNIST': data_name = 'MNIST_trainIB_below' else: print('can not run for this dataset') return 0 # Fit a decision tree to training set model = fit_tree(data_name) # Compute layer information content and plot it on the information plane [X,Y,X_train,Y_train,X_test,Y_test, features, labels] = get_data(data_name) df_nodes = compute_nodes_info(X_train,Y_train,model) layers = get_layers(X_train, Y_train, model) df_layers = compute_layers_info(df_nodes, layers,X_train, Y_train, model) plot_info_plane(df_layers, data_name) return df_layers # EXP I (b) : IB_train > IB_full elif experiment==1.2: # Get the modified datasets if data_name=='2D_noisy': data_name='2D_noisy_trainIB_above' elif data_name=='MNIST': data_name = 'MNIST_trainIB_above' else: print('can not run for this dataset') return 0 # Fit a decision tree to training set model = fit_tree(data_name) # Compute layer information content and plot it on the information plane [X,Y,X_train,Y_train,X_test,Y_test, features, labels] = get_data(data_name) df_nodes = compute_nodes_info(X_train,Y_train,model) layers = get_layers(X_train, Y_train, model) df_layers = compute_layers_info(df_nodes, layers,X_train, Y_train, model) plot_info_plane(df_layers, data_name) return df_layers # EXP II (a) : IB_train = IB_full with training set elif experiment==2.1: # Fit a decision tree to the training set model = fit_tree(data_name) # Compute layer information content and plot it on the information plane [X,Y,X_train,Y_train,X_test,Y_test, features, labels] = get_data(data_name) df_nodes = compute_nodes_info(X_train, Y_train, model) layers = get_layers(X_train, Y_train, model) df_layers = compute_layers_info(df_nodes, layers, X_train, Y_train, model) plot_info_plane(df_layers, data_name) return df_layers # EXP II (b) : IB_train = IB_full with full set elif experiment==2.2: # Fit a decision tree to the full set model = fit_tree(data_name,full_set=True) # Compute layer information content and plot it on the information plane [X,Y,X_train,Y_train,X_test,Y_test, features, labels] = get_data(data_name) df_nodes = compute_nodes_info(X, Y, model) layers = get_layers(X, Y, model) df_layers = compute_layers_info(df_nodes, layers, X, Y, model) plot_info_plane(df_layers, data_name, full_set=True) return df_layers # EXP III : IB_train = IB_full pruned elif experiment==3: # Fit a decision tree to the training set model = fit_tree(data_name) # Compute layer information content and plot it on the information plane [X,Y,X_train,Y_train,X_test,Y_test, features, labels] = get_data(data_name) df_nodes = compute_nodes_info(X_train, Y_train, model) layers = get_layers(X_train, Y_train, model) df_layers = compute_layers_info(df_nodes, layers, X_train, Y_train, model) plot_info_plane(df_layers, data_name) # test multiple values of alpha pruned_model = get_best_alpha(model, data_name) #print('\n Alpha value equals to : ' + str(best_alpha)) # if alpha is different from zero, fit a new pruned model #if best_alpha!=0: #pruned_model = fit_tree(data_name,alpha=best_alpha,pruned=True) # Compute layer information content and plot it on the information plane df_nodes_pruned = compute_nodes_info(X_train,Y_train,pruned_model) layers_pruned = get_layers(X_train, Y_train, pruned_model) df_layers_pruned = compute_layers_info(df_nodes_pruned, layers_pruned, X_train, Y_train, pruned_model) plot_info_plane(df_layers_pruned, data_name,pruned=True) return [df_layers,df_layers_pruned] def run_datafit_exp(data_name,experiment = 0): # EXP I : perfect fit if experiment==1: # Fit a decision tree to training set model = fit_tree(data_name,full_set=True) # Compute layer information content and plot it on the information plane [X,Y,X_train,Y_train,X_test,Y_test, features, labels] = get_data(data_name) df_nodes = compute_nodes_info(X,Y,model) layers = get_layers(X, Y, model) df_layers = compute_layers_info(df_nodes, layers,X, Y, model) plot_info_plane_train(df_layers, data_name, badfit=False) return df_layers # EXP II : imperfect fit elif experiment==2: # Fit a decision tree to training set model = fit_tree(data_name,max_depth=5,full_set=True) # Compute layer information content and plot it on the information plane [X,Y,X_train,Y_train,X_test,Y_test, features, labels] = get_data(data_name) df_nodes = compute_nodes_info(X,Y,model) layers = get_layers(X, Y, model) df_layers = compute_layers_info(df_nodes, layers,X, Y, model) plot_info_plane_train(df_layers, data_name, badfit=True) return df_layers #%% Exp gen 1.1 NOISY ############################################## # Toy generalization experiments ############################################## data = '2D_noisy' exp = 1.1 gen_noisy_11 = run_generalization_exp(data,exp) """ accuracy on training set = 1.0 accuracy on test set = 0.9577553310886644 accuracy on whole set = 0.9581777777777778 IB_full I_Y at : 0.6277840849515826 IB_train I_Y at : 0.5164490482843502 IB_test I_Y at : 0.6288039502126452 """ #%% Exp gen 1.2 NOISY data = '2D_noisy' exp = 1.2 gen_noisy_12 = run_generalization_exp(data,exp) """ accuracy on training set = 1.0 accuracy on test set = 0.94334455667789 accuracy on whole set = 0.9439111111111111 IB_full I_Y at : 0.6277840849515826 IB_train I_Y at : 0.7037900402465797 IB_test I_Y at : 0.6269561300295622 """ #%% Exp gen 2.1 NOISY data = '2D_noisy' exp = 2.1 gen_noisy_21 = run_generalization_exp(data,exp) """ accuracy on training set = 1.0 accuracy on test set = 0.9489891135303266 accuracy on whole set = 0.9927111111111111 IB_full I_Y at : 0.6277840849515826 IB_train I_Y at : 0.6286227149906515 IB_test I_Y at : 0.6227260846252679 """ #%% Exp gen 2.2 NOISY data = '2D_noisy' exp = 2.2 gen_noisy_22 = run_generalization_exp(data,exp) """ accuracy on training set = 1.0 accuracy on test set = 1.0 accuracy on whole set = 1.0 IB_full I_Y at : 0.6277840849515826 IB_train I_Y at : 0.6277840849515826 IB_test I_Y at : 0.6227260846252679 """ #%% Exp gen 3 NOISY data = '2D_noisy' exp = 3 gen_noisy_3 = run_generalization_exp(data,exp) """ ### non pruned : accuracy on training set = 1.0 accuracy on test set = 0.9489891135303266 accuracy on whole set = 0.9927111111111111 IB_full I_Y at : 0.6277840849515826 IB_train I_Y at : 0.6286227149906515 IB_test I_Y at : 0.6227260846252679 Fitting 5 folds for each of 20 candidates, totalling 100 fits [Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers. [Parallel(n_jobs=4)]: Done 17 tasks | elapsed: 5.3s [Parallel(n_jobs=4)]: Done 100 out of 100 | elapsed: 5.8s finished 0.9626134301270417 {'ccp_alpha': 0.0010767609949358314} accuracy on training set = 0.9640134819808142 accuracy on test set = 0.9595645412130638 accuracy on whole set = 0.9633777777777778 IB_full I_Y at : 0.6277840849515826 IB_train I_Y at : 0.6286227149906515 IB_test I_Y at : 0.6227260846252679 """ #%% Exp gen 1.1 MNIST ############################################## # MNIST generalization experiments ############################################## data = 'MNIST' exp = 1.1 gen_mnist_11 = run_generalization_exp(data,exp) """ accuracy on training set = 1.0 accuracy on test set = 0.4004862004862005 accuracy on whole set = 0.40108571428571427 IB_full I_Y at : 3.3198370254034137 IB_train I_Y at : 3.1383843022755262 IB_test I_Y at : 3.319855546026464 """ #%% Exp gen 1.2 MNIST data = 'MNIST' exp = 1.2 gen_mnist_12 = run_generalization_exp(data,exp) #%% Exp gen 2.1 MNIST data = 'MNIST' exp = 2.1 gen_mnist_21 = run_generalization_exp(data,exp) """ accuracy on training set = 1.0 accuracy on test set = 0.8855 accuracy on whole set = 0.9836428571428572 IB_full I_Y at : 3.3198370254034137 IB_train I_Y at : 3.3198709267551885 IB_test I_Y at : 3.3194225261208263 """ #%% Exp gen 2.2 MNIST data = 'MNIST' exp = 2.2 gen_mnist_22 = run_generalization_exp(data,exp) """ accuracy on training set = 1.0 accuracy on test set = 1.0 accuracy on whole set = 1.0 IB_full I_Y at : 3.3198370254034137 IB_train I_Y at : 3.3198370254034137 IB_test I_Y at : 3.3194225261208263 """ #%% Exp gen 3 MNIST data = 'MNIST' exp = 3 gen_mnist_3, gen_mnist_3_pruned= run_generalization_exp(data,exp) """ accuracy on training set = 1.0 accuracy on test set = 0.8855 accuracy on whole set = 0.9836428571428572 IB_full I_Y at : 3.3198370254034137 IB_train I_Y at : 3.3198709267551885 IB_test I_Y at : 3.3194225261208263 Fitting 5 folds for each of 20 candidates, totalling 100 fits [Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers. [Parallel(n_jobs=4)]: Done 17 tasks | elapsed: 3.0min [Parallel(n_jobs=4)]: Done 90 tasks | elapsed: 13.1min [Parallel(n_jobs=4)]: Done 100 out of 100 | elapsed: 14.2min finished 0.8728666666666666 {'ccp_alpha': 0.00033035310424870437} accuracy on training set = 0.9258166666666666 accuracy on test set = 0.8857 accuracy on whole set = 0.9200857142857143 IB_full I_Y at : 3.3198370254034137 IB_train I_Y at : 3.3198709267551885 IB_test I_Y at : 3.3194225261208263 """ #%% Exp fit 1 NOISY ############################################## # Toy fit to the data experiments ############################################## data = '2D_noisy' exp = 1 fit_noisy_1 = run_datafit_exp(data,exp) """ accuracy on training set = 1.0 accuracy on test set = 1.0 accuracy on whole set = 1.0 """ #%% Exp fit 2 NOISY data = '2D_noisy' exp = 2 fit_noisy_2 = run_datafit_exp(data,exp) """ accuracy on training set = 0.9683817524209544 accuracy on test set = 0.9748075577326802 accuracy on whole set = 0.9693 """ #%% Exp fit 1 MNIST ############################################## # MNIST fit to the data experiments ############################################## data = 'MNIST' exp = 1 fit_mnist_1 = run_datafit_exp(data,exp) """ accuracy on training set = 1.0 accuracy on test set = 1.0 accuracy on whole set = 1.0 """ #%% Exp fit 2 MNIST data = 'MNIST' exp = 2 fit_mnist_2 = run_datafit_exp(data,exp) """ accuracy on training set = 0.6828166666666666 accuracy on test set = 0.6877 accuracy on whole set = 0.6835142857142857 """