Newer
Older
# Copyright CNRS/Inria/UNS
# Contributor(s): Eric Debreuve (since 2019), Morgane Nadal (2020)
#
# eric.debreuve@cnrs.fr
#
# This software is governed by the CeCILL license under French law and
# abiding by the rules of distribution of free software. You can use,
# modify and/ or redistribute the software under the terms of the CeCILL
# license as circulated by CEA, CNRS and INRIA at the following URL
# "http://www.cecill.info".
#
# As a counterpart to the access to the source code and rights to copy,
# modify and redistribute granted by the license, users are provided only
# with a limited warranty and the software's author, the holder of the
# economic rights, and the successive licensors have only limited
# liability.
#
# In this respect, the user's attention is drawn to the risks associated
# with loading, using, modifying and/or developing or reproducing the
# software by the user in light of its specific status of free software,
# that may mean that it is complicated to manipulate, and that also
# therefore means that it is reserved for developers and experienced
# professionals having in-depth computer knowledge. Users are therefore
# encouraged to load and test the software's suitability as regards their
# requirements in conditions enabling the security of their systems and/or
# data to be ensured and, more generally, to use and operate it in the
# same conditions as regards security.
#
# The fact that you are presently reading this means that you have had
# knowledge of the CeCILL license and that you accept its terms.
import pandas as pd_
import numpy as np_
import matplotlib.pyplot as pl_
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
NADAL Morgane
committed
import os
import glob
import pandas as pd
NADAL Morgane
committed
def KMeansIntraImage(df, nb_clusters: tuple, representation=False, labeled_somas=None):
# Data standardization
scaler = StandardScaler()
scaler.fit(df)
stand_df = scaler.transform(df)
# Best number of clusters using Elbow method
wcss = [] # within cluster sum of errors(wcss)
for i in range(1, 24):
kmeans = KMeans(n_clusters=i, init='k-means++', max_iter=300, n_init=10, random_state=0)
kmeans.fit(stand_df)
wcss.append(kmeans.inertia_)
pl_.plot(range(1, 24), wcss)
pl_.plot(range(1, 24), wcss, 'bo')
pl_.title('Elbow Method')
pl_.xlabel('Number of clusters')
pl_.ylabel('WCSS')
pl_.show(block=True)
pl_.close()
NADAL Morgane
committed
# Kmeans with x clusters
for nb_cluster in nb_clusters:
kmeans = KMeans(n_clusters=nb_cluster, init='k-means++', max_iter=300, n_init=10, random_state=0)
pred_y = kmeans.fit_predict(stand_df)
# Intracluster variance
var = []
for cluster in range(nb_cluster):
soma_cluster = [indx+1 for indx, value in enumerate(kmeans.labels_) if value == cluster]
mean_cluster = np_.average([df.loc[f"soma {row}", :] for row in soma_cluster], axis=0)
variance = sum([np_.linalg.norm(df.loc[f"soma {row}", :] - mean_cluster)**2 for row in soma_cluster])/len(soma_cluster)
var.append(variance)
print(f"Intracluster variance for {nb_cluster} clusters :", var)
# Representation on the image
NADAL Morgane
committed
if representation:
RepresentationOnImages(labeled_somas, kmeans, nb_cluster)
def RepresentationOnImages(labeled_somas, kmeans, nb_cluster):
clustered_somas = labeled_somas.copy()
clustered_somas = np_.amax(clustered_somas, axis=0)
for indx, value in enumerate(kmeans.labels_):
for indx_axe, axe in enumerate(clustered_somas):
for indx_pixel, pixel in enumerate(axe):
if pixel == indx + 1:
clustered_somas[indx_axe][indx_pixel] = value + 1
pl_.imshow(clustered_somas, cmap="tab20")
pl_.title(f"n cluster = {nb_cluster}")
pl_.show(block=True)
pl_.close()
def FeatureDistribution(df):
for column in df.columns:
print(column)
hist = df[column].hist(bins=20)
pl_.title(f"{column}")
pl_.savefig(f"D:\\MorganeNadal\\M2 report\\kmeans24\\feat_distrib_{column}.png")
pl_.close()
if __name__ == "__main__":
NADAL Morgane
committed
os.chdir("D:\MorganeNadal\Results\Features")
all_filenames = [i for i in glob.glob('*.{}'.format("csv"))]
print(all_filenames)
df = pd_.concat([pd_.read_csv(f, index_col=0) for f in all_filenames])
# df.to_csv("D:\MorganeNadal\Results\combined_features.csv")
# labeled_somas = np_.load("D:\\MorganeNadal\\Results\\labeled_somas.npy")
# df = pd_.read_csv("D:\\MorganeNadal\\M2 report\\Results\\features_all_images_DIO_CHO_.csv", index_col=0)
df = df.drop(["spherical_angles_eva", "spherical_angles_evb", "hist_lengths", "hist_lengths_P", "hist_lengths_S",
"hist_curvature", "hist_curvature_P", "hist_curvature_S"],
axis=1)
NADAL Morgane
committed
KMeansIntraImage(df, nb_clusters=(2,))