# Copyright CNRS/Inria/UNS # Contributor(s): Eric Debreuve (since 2019), Morgane Nadal (2020) # # eric.debreuve@cnrs.fr # # This software is governed by the CeCILL license under French law and # abiding by the rules of distribution of free software. You can use, # modify and/ or redistribute the software under the terms of the CeCILL # license as circulated by CEA, CNRS and INRIA at the following URL # "http://www.cecill.info". # # As a counterpart to the access to the source code and rights to copy, # modify and redistribute granted by the license, users are provided only # with a limited warranty and the software's author, the holder of the # economic rights, and the successive licensors have only limited # liability. # # In this respect, the user's attention is drawn to the risks associated # with loading, using, modifying and/or developing or reproducing the # software by the user in light of its specific status of free software, # that may mean that it is complicated to manipulate, and that also # therefore means that it is reserved for developers and experienced # professionals having in-depth computer knowledge. Users are therefore # encouraged to load and test the software's suitability as regards their # requirements in conditions enabling the security of their systems and/or # data to be ensured and, more generally, to use and operate it in the # same conditions as regards security. # # The fact that you are presently reading this means that you have had # knowledge of the CeCILL license and that you accept its terms. import pandas as pd_ import numpy as np_ import matplotlib.pyplot as pl_ from sklearn.cluster import KMeans from sklearn.preprocessing import StandardScaler import os import glob import pandas as pd def KMeansIntraImage(df, nb_clusters: tuple, representation=False, labeled_somas=None): # Data standardization scaler = StandardScaler() scaler.fit(df) stand_df = scaler.transform(df) # Best number of clusters using Elbow method wcss = [] # within cluster sum of errors(wcss) for i in range(1, 24): kmeans = KMeans(n_clusters=i, init='k-means++', max_iter=300, n_init=10, random_state=0) kmeans.fit(stand_df) wcss.append(kmeans.inertia_) pl_.plot(range(1, 24), wcss) pl_.plot(range(1, 24), wcss, 'bo') pl_.title('Elbow Method') pl_.xlabel('Number of clusters') pl_.ylabel('WCSS') pl_.show(block=True) pl_.close() # Kmeans with x clusters for nb_cluster in nb_clusters: kmeans = KMeans(n_clusters=nb_cluster, init='k-means++', max_iter=300, n_init=10, random_state=0) pred_y = kmeans.fit_predict(stand_df) # Intracluster variance var = [] for cluster in range(nb_cluster): soma_cluster = [indx+1 for indx, value in enumerate(kmeans.labels_) if value == cluster] mean_cluster = np_.average([df.loc[f"soma {row}", :] for row in soma_cluster], axis=0) variance = sum([np_.linalg.norm(df.loc[f"soma {row}", :] - mean_cluster)**2 for row in soma_cluster])/len(soma_cluster) var.append(variance) print(f"Intracluster variance for {nb_cluster} clusters :", var) # Representation on the image if representation: RepresentationOnImages(labeled_somas, kmeans, nb_cluster) def RepresentationOnImages(labeled_somas, kmeans, nb_cluster): clustered_somas = labeled_somas.copy() clustered_somas = np_.amax(clustered_somas, axis=0) for indx, value in enumerate(kmeans.labels_): for indx_axe, axe in enumerate(clustered_somas): for indx_pixel, pixel in enumerate(axe): if pixel == indx + 1: clustered_somas[indx_axe][indx_pixel] = value + 1 pl_.imshow(clustered_somas, cmap="tab20") pl_.title(f"n cluster = {nb_cluster}") pl_.show(block=True) pl_.close() def FeatureDistribution(df): for column in df.columns: print(column) hist = df[column].hist(bins=20) pl_.title(f"{column}") pl_.savefig(f"D:\\MorganeNadal\\M2 report\\kmeans24\\feat_distrib_{column}.png") pl_.close() if __name__ == "__main__": os.chdir("D:\MorganeNadal\Results\Features") all_filenames = [i for i in glob.glob('*.{}'.format("csv"))] print(all_filenames) df = pd_.concat([pd_.read_csv(f, index_col=0) for f in all_filenames]) # df.to_csv("D:\MorganeNadal\Results\combined_features.csv") # labeled_somas = np_.load("D:\\MorganeNadal\\Results\\labeled_somas.npy") # df = pd_.read_csv("D:\\MorganeNadal\\M2 report\\Results\\features_all_images_DIO_CHO_.csv", index_col=0) df = df.drop(["spherical_angles_eva", "spherical_angles_evb", "hist_lengths", "hist_lengths_P", "hist_lengths_S", "hist_curvature", "hist_curvature_P", "hist_curvature_S"], axis=1) KMeansIntraImage(df, nb_clusters=(2,)) # FeatureDistribution(df)