new plotting options to discriminate features based on the kmeans clusters

ab5192ae · NADAL Morgane · 904906c0 · ab5192ae
Commit ab5192ae authored 4 years ago by NADAL Morgane
--- a/features_analysis.py
+++ b/features_analysis.py
@@ -406,6 +406,47 @@ def KmeansOnDF(df: pd_.DataFrame(),
                        pl_.savefig(f"{save_in}\\feat_boxplot_{column}_{comb}_k={nb_cluster}.png")
                        pl_.close()

+                # Do the same thing but separating durations
+                cond_col_df_dur = pd_.concat((cond_col_df, df["duration"]), axis=1)
+                groupby_dur_ = cond_col_df_dur.groupby("duration")
+
+                fig = pl_.figure(constrained_layout=False, figsize=(15, 10))
+                gs = fig.add_gridspec(ncols=3, nrows=1)
+                ax1 = fig.add_subplot(gs[0, 0])
+                ax2 = fig.add_subplot(gs[0, 1])
+                ax3 = fig.add_subplot(gs[0, 2])
+                ax = [ax1, ax2, ax3]
+
+                # Plot a histogram and kernel density estimate
+                # print(f"Plot a histogram and kernel density estimate for feature {column}")
+                x = 0
+                for dur_, val in groupby_dur_:
+                    for comb, color in zip(itertools.product(conditions, labels), sb_.color_palette("deep", n_colors=len(labels)*len(conditions))):
+                        to_plot = val.loc[(val['condition'] == comb[0])]
+                        to_plot = to_plot.loc[(to_plot['label'] == comb[1])]
+
+                        # Kernel estimate of the histogram
+                        sb_.distplot(to_plot[[column]], hist=False, color=color, ax=ax[x])
+
+                    lines = [Line2D([0], [0], color=c, linewidth=3, linestyle='-') for c in sb_.color_palette("deep", n_colors=len(labels)*len(conditions))]
+                    lb = list(itertools.product(conditions, labels))
+                    ax[x].legend(lines, lb)
+                    ax[x].set_title(f'{dur_}', fontsize=11)
+                    # ax[x].set_xlim(min(val[column]), max(val[column]))
+                    # ax[x].set_ylim()
+                    ax[x].set_xlabel("Distribution kernel estimate")
+                    ax[x].set_ylabel("Features values")
+                    x += 1
+
+                fig.suptitle(f'{column} distribution{title}')
+
+                # pl_.tight_layout()
+
+                if save_in is not None:
+                    pl_.savefig(f"{save_in}\\feat_kernel_estimate_{column}_k={nb_cluster}_dur.png")
+                    pl_.close()
+
+
        # Representation on the image
        if rep_on_image:
            RepresentationOnImages(labeled_somas, kmeans, nb_cluster)
@@ -814,7 +855,7 @@ if __name__ == "__main__":
    # -- PCA with all the features
    print("\nSELECTED FEATURES\n")
    # Between the two conditions, regardless the duration of experiment (2 conditions, all durations)
-    PCAOnDF(df,
+    PCAOnDF(selected_df,
            target="condition",
            targets=["CHO", "DIO"],
            colors=["b", "r"],
@@ -824,7 +865,7 @@ if __name__ == "__main__":
            )

    # # Between the two conditions, for each duration (2 conditions, 3 durations)
-    # groupby_duration = df.groupby("duration")
+    # groupby_duration = selected_df.groupby("duration")
    # for duration, values in groupby_duration:
    #     # duration: str, values: pd_.DataFrame()
    #     PCAOnDF(values,
@@ -841,7 +882,7 @@ if __name__ == "__main__":
    # -- K-means with all the features (2 conditions)

    # Between the two conditions, regardless the duration of experiment (2 conditions, all durations)
-    kmeans = KmeansOnDF(df,
+    kmeans = KmeansOnDF(selected_df,
                        target="condition",
                        nb_clusters=(2, 3, 4, 5),
                        elbow=False,
@@ -853,7 +894,7 @@ if __name__ == "__main__":
                        )

    # # Between the two conditions, for each duration (2 conditions, 3 durations)
-    # groupby_duration = df.groupby("duration")
+    # groupby_duration = selected_df.groupby("duration")
    # for duration, values in groupby_duration:
    #     kmeans = KmeansOnDF(values,
    #                         target="condition",