From 74014f2709f52ed30bfd0b24830cc821097c8c61 Mon Sep 17 00:00:00 2001
From: KOVAC Grgur <grgur.kovac@inria.fr>
Date: Fri, 23 Feb 2024 19:07:33 +0100
Subject: [PATCH] Many changes -> Global AI Cultures Workshop state

---
 README.md                                     |  89 +++-
 campaign_evaluations.py                       | 295 ++++++++++--
 evaluate.py                                   |   2 +
 neurips_correlations.py                       |  99 -----
 neurips_correlations.sh                       | 418 ------------------
 neurips_evaluations.sh                        | 331 --------------
 parse_global_opinions_dataset.py              |  41 ++
 parse_res_json.py                             |   3 -
 .../tolkien_characters_races.txt              |  60 +++
 price_estimate.py                             |  89 ----
 run_campaign_sim_conv_no_pop.sh               |  34 +-
 run_campaign_sim_conv_pvq_msgs.sh             |  26 +-
 run_campaign_sim_conv_pvq_seeds.sh            |  56 +--
 run_campaign_sim_conv_pvq_seeds_NO_SYSTEM.sh  |  38 +-
 run_dummy.sh                                  |  39 +-
 run_plosone_pvq.sh                            | 179 --------
 run_single.sh                                 | 268 +++++++++++
 vis_pop_values.py                             | 281 ++++++++++++
 visualization_scripts/plt_vis_tomi.py         | 105 -----
 .../scatter_plot_tomi_old.py                  | 244 ----------
 visualization_scripts/wvs_analysis.py         | 353 +++++++++++++++
 21 files changed, 1437 insertions(+), 1613 deletions(-)
 delete mode 100644 neurips_correlations.py
 delete mode 100644 neurips_correlations.sh
 delete mode 100644 neurips_evaluations.sh
 create mode 100644 parse_global_opinions_dataset.py
 create mode 100644 personas/tolkien_characters/tolkien_characters_races.txt
 delete mode 100644 price_estimate.py
 delete mode 100644 run_plosone_pvq.sh
 create mode 100644 run_single.sh
 create mode 100644 vis_pop_values.py
 delete mode 100644 visualization_scripts/plt_vis_tomi.py
 delete mode 100644 visualization_scripts/scatter_plot_tomi_old.py
 create mode 100644 visualization_scripts/wvs_analysis.py

diff --git a/README.md b/README.md
index 586b96a..40a3f1e 100644
--- a/README.md
+++ b/README.md
@@ -6,8 +6,9 @@ This codebase is based on MMLU codebase. - link
 
 Setup the conda env
 ```
-conda create -n llm_persp python=3.9
+conda create -n llm_stability python=3.9
 conda activate llm_persp
+cd test/
 pip install -r requirements.txt 
 
 # install transformers
@@ -17,34 +18,90 @@ conda install cudatoolkit -y
 ```
 
 
-[//]: # (or)
+# Evaluating a model
 
-[//]: # (```)
+The ``run_single.sh`` contains an example of how to evaluate a models.
 
-[//]: # (git clone https://github.com/huggingface/transformers.git)
+It requires to set 7 parameters, which are by default set to:
+```
+1. Theme:grammar
+2. Seed:1
+3. N messages:3
+4. LLM:dummy
+5. Questionnaire:pvq
+6. Population:tolkien_characters
+7. Experiment name:test
+```
+
+You can modify those parameters inside the script (following the comments).
+
+
+From the test directory, run
+```
+bash run_single.sh
+```
 
-[//]: # (cd transformers)
+This will evaluate a dummy model, which chooses random answers on the PVQ questionniare.
 
-[//]: # (git checkout d04ec99bec8a0b432fc03ed60cea9a1a20ebaf3c)
+# Running all experiments
 
-[//]: # (pip install .)
 
-[//]: # (```)
+All the experiments in the paper are shown in ```run_campain*.sh``` scripts.
+
+These are slurm scripts and enable parallel evaluation of different topics and seeds. These scripts require an argument, which defines the model.
+The following command evaluates the Mistral-Instruct-v0.2 model:
+
+```
+sbatch run_campaign_sim_conv_pvq_seeds.sh 7
+```
+
+Those scripts require setting the population and the questionnaire. They can easily be changed following the scripts comments.
+By default, they are set to fictional characters and PVQ:
+```
+## PVQ - tolkien characters
+test_tag="pvq"
+experiment_name="pvq_test"
+data_dir="data_pvq"
+population_type="tolkien_characters"
+```
 
 
-# Running experiments
+The scripts are used for various experiments as follows:
 
-Script run_dummy.sh shows an example of how to run a model.
+Experiments with simulated populations: ```run_campaign_sim_conv_pvq_seeds.sh``` 
 
-Scripts run_[neurips,iclr]_[pvq,hof,big5].sh contain the commands used to run our experiments.
+Experiments with simulated populations and increasing conversation length:
+```run_campaign_sim_conv_pvq_msgs.sh```
 
-# Evaluation
+Experiments with no persona instructions: ```run_campaign_sim_conv_no_pop.sh```
+
+Ablation study on the system message with LLaMa-2 models: ```run_campaign_sim_conv_pvq_NO_SYSTEM.sh```
+
+
+## Non-slurm machine
+
+The ```run_campain*.sh``` scripts can be run on a regular machine my manually setting the ```SLURM_ARRAY_TAK_ID''' variable as follows:
+
+1. Check the slurm array size parameter
+
+```
+grep "$SBATCH --array=" run_campaign_sim_conv_pvq_seeds.sh
+```
+
+The expected output is:
+```#SBATCH --array=0-29 # themes x n_seeds -> 6x5```
+This means that slurm would run **30 parallel jobs** corresponding to 6 themes (5 + no theme) and 5 seeds.
+
+2. Run the jobs manually
+
+You can run the 30 evaluations sequentially on a regular machine as follows:
+```
+for i in {0..30}; do SLURM_ARRAY_TASK_ID=$i bash run_campaign_sim_conv_pvq_seeds.sh ; done
+```
 
-The bar_viz.py script is used for visualization evaluation and statistical analysis.
-It can be used as such:
+or in parallel as follows:
 ```
-python visualization_scripts/bar_viz.py results_iclr/results_pvq_test_sim_conv_gpt-3.5-turbo-0301_perm_50_theme/*
+for i in {0..30}; do SLURM_ARRAY_TASK_ID=$i bash run_campaign_sim_conv_pvq_seeds.sh & done
 ```
-Scripts [neurips,iclr]_evaluations.sh contain command to evaluate and plot the results from our experiments.
 
 
diff --git a/campaign_evaluations.py b/campaign_evaluations.py
index 13dbefd..74232bf 100644
--- a/campaign_evaluations.py
+++ b/campaign_evaluations.py
@@ -12,15 +12,16 @@ import scipy.stats as st
 from termcolor import cprint
 
 def model_2_family(model):
-    if "llama_2" in model:
+    model_lower = model.lower()
+    if "llama_2" in model_lower:
         return "LLaMa-2"
-    elif "Mixtral" in model:
+    elif "mixtral" in model_lower:
         return "Mixtral"
-    elif "Mistral" in model or "zephyr" in model:
+    elif "mistral" in model_lower or "zephyr" in model_lower:
        return "Mistral"
-    elif "phi" in model:
+    elif "phi" in model_lower:
         return "Phi"
-    elif "Qwen" in model:
+    elif "qwen" in model_lower:
         return "Qwen"
     elif "dummy":
         return "dummy"
@@ -53,10 +54,19 @@ def legend_without_duplicate_labels(ax, loc="best", title=None, legend_loc=None)
     # axs[plt_i].legend(bbox_to_anchor=legend_loc, loc="best")
     ax.legend(*zip(*unique), loc=loc, title=title, fontsize=legend_fontsize, title_fontsize=legend_fontsize, bbox_to_anchor=legend_loc)
 
+def get_all_ipsative_corrs_str(default_profile):
 
-def run_analysis(eval_script_path, data_dir, assert_n_contexts=None, insert_dummy_participants=False):
+    if default_profile is not None:
+        all_ipsative_corrs_str = "All_Ipsative_corrs"
+    else:
+        all_ipsative_corrs_str = "All_Ipsative_corrs_default_profile"
+
+    return all_ipsative_corrs_str
+
+
+def run_analysis(eval_script_path, data_dir, assert_n_contexts=None, insert_dummy_participants=False, default_profile=None, paired_data_dir=None):
     # run evaluation script
-    command = f"python {eval_script_path} --result-json-stdout {'--assert-n-dirs ' + str(assert_n_contexts) if assert_n_contexts else ''} {'--insert-dummy' if insert_dummy_participants else ''} {data_dir}/*/*"
+    command = f"python {eval_script_path} --result-json-stdout {'--assert-n-dirs ' + str(assert_n_contexts) if assert_n_contexts else ''} {'--insert-dummy' if insert_dummy_participants else ''} {f'--default-profile {default_profile}' if default_profile is not None else ''} {data_dir}/*/* {f'--paired-dirs {paired_data_dir}/*/*' if paired_data_dir is not None else ''}"
     process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
     stdout, stderr = process.communicate()
 
@@ -64,7 +74,9 @@ def run_analysis(eval_script_path, data_dir, assert_n_contexts=None, insert_dumm
 
     # parse json outputs
     results = json.loads(stdout)
-    results["All_Ipsative_corrs"]=np.array(results["All_Ipsative_corrs"])
+
+    all_ipsative_corrs_str = get_all_ipsative_corrs_str(default_profile)
+    results[all_ipsative_corrs_str] = np.array(results[all_ipsative_corrs_str])
 
     return results
 
@@ -156,6 +168,8 @@ add_title = False
 
 bars_as_plot = False
 
+label_ = None
+
 experiment_dirs = [
     "sim_conv_pvq_permutations_msgs",
     # "sim_conv_pvq_tolkien_characters_seeds",
@@ -179,12 +193,18 @@ metric = "Ipsative"
 
 
 # figure_name = "tolk_ro_t"
+figure_name = "paired_tolk_ro_uni"
+# figure_name = "paired_tolk_ro_ben"
+# figure_name = "paired_tolk_ro_pow"
+# figure_name = "paired_tolk_ro_ach"
 # figure_name = "fam_ro_t"
-figure_name = "don_t"
+# figure_name = "don_t"
 # figure_name = "no_pop_ips"
 # figure_name = "tolk_ro_msgs"
 # figure_name = "tolk_ips_msgs"
 # figure_name = "no_pop_msgs"
+# figure_name = "tolk_ips_msgs_default_prof"
+# figure_name = "llama_sys_no_sys"
 
 rotatation_x_labels = 0
 
@@ -199,9 +219,22 @@ title_fontsize = 18
 interval_figsize_x = 8
 interval_figsize_y = 7
 
-show_human_change = False
+round_y_lab = 1
+
+show_human_change = True
 legend_loc = None
 
+legend_title = "LLM families"
+
+default_profile = None
+
+add_tolkien_ro_curve = False
+add_tolkien_ipsative_curve = False
+
+left_adjust = None
+paired_dir = None
+y_label = None
+
 if figure_name == "no_pop_msgs":
     experiment_dirs = ["sim_conv_pvq_permutations_msgs"]
     seed_strings = [f"{i}_msgs/_seed" for i in range(1, 10, 2)]  # msgs (show trends
@@ -210,6 +243,7 @@ if figure_name == "no_pop_msgs":
     bar_plots = False
     models = plot_models
     metric = "Ipsative"
+    human_change_xloc = -1.0
     msgs_ro_tolk = False
 
     min_y, max_y = -0.1, 1.0  # IPS
@@ -222,6 +256,39 @@ if figure_name == "no_pop_msgs":
     interval_figsize_x = 14
     interval_figsize_y = 7
 
+elif figure_name == "tolk_ips_msgs_default_prof":
+
+    # Messages on Ips Tolkien
+    models = ["1_msgs", "3_msgs", "5_msgs", "7_msgs", "9_msgs"]
+    experiment_dirs = ["sim_conv_pvq_tolkien_characters_msgs/Mixtral-8x7B-Instruct-v0.1"]
+    seed_strings = [f"{i}_seed" for i in range(1, 10, 2)]
+    y_label = "Stability"
+
+    bar_plots = True
+    bars_as_plot = True
+    add_tolkien_ro_curve = True
+    add_tolkien_ipsative_curve = False
+    msgs_ro_tolk = True
+    legend_title = None
+    legend_fontsize = 14
+
+    label_ = "Ipsative stability (with\n  the default profile)"
+
+    metric = "Ipsative_default_profile"
+    default_profile = "results/sim_conv_pvq_permutations_msgs/Mixtral-8x7B-Instruct-v0.1/9_msgs/_seed/results_sim_conv_permutations_Mixtral-8x7B-Instruct-v0.1/pvq_test_Mixtral-8x7B-Instruct-v0.1_data_pvq_pvq_auto__permutations_50_permute_options_5_no_profile_True_format_chat___2024_02_14_20_47_27"
+    add_legend = True
+    human_change_xloc = 6.8
+    show_human_change = False
+
+    min_y, max_y = 0.3, 0.8  # IPS
+    round_y_lab = 2
+
+    left_adjust = 0.15
+
+    interval_figsize_x = 6
+    interval_figsize_y = 6
+
+
 elif figure_name == "no_pop_ips":
     experiment_dirs = ["sim_conv_pvq_permutations_msgs"]
     seed_strings = ["3_msgs/_seed"]  # ips (only n=3)
@@ -230,6 +297,7 @@ elif figure_name == "no_pop_ips":
     bar_plots = True
     add_legend = True
     metric = "Ipsative"
+    human_change_xloc = -1.0
     msgs_ro_tolk = False
 
     show_human_change = True
@@ -262,6 +330,7 @@ elif figure_name == "tolk_ro_t":
     add_legend = True
     add_title = True
     metric = "Rank-Order"
+    human_change_xloc = 6.8
     msgs_ro_tolk = False
     show_human_change = True
     legend_fontsize = 22
@@ -272,6 +341,53 @@ elif figure_name == "tolk_ro_t":
 
     min_y, max_y = -0.1, 0.8  # RO
 
+elif figure_name.startswith("paired_tolk_ro"):
+
+    if figure_name.endswith("uni"):
+        value_to_pair = "Universalism"
+    elif figure_name.endswith("ben"):
+        value_to_pair = "Benevolence"
+    elif figure_name.endswith("pow"):
+        value_to_pair = "Power"
+    elif figure_name.endswith("ach"):
+        value_to_pair = "Achievement"
+    else:
+        raise ValueError(f"Undefined figure name: {figure_name}")
+
+    # y_label = f"Rank-Order stability\n{value_to_pair}-Donation"
+    y_label = f"Rank-Order stability\nwith donation"
+
+    experiment_dirs = ["sim_conv_pvq_tolkien_characters_seeds"]
+    paired_dir = "sim_conv_tolkien_donation_tolkien_characters_seeds"
+
+    seed_strings = [f"{i}_seed" for i in range(1, 10, 2)]
+
+    add_tolkien_ipsative_curve = False
+    bar_plots = True
+
+    if value_to_pair == "Universalism":
+        add_legend = True
+        legend_fontsize = 20
+    else:
+        add_legend = False
+
+    add_title = False
+    metric = "Rank-Order"
+    msgs_ro_tolk = False
+    show_human_change = False
+    human_change_xloc = 6.8
+    rotatation_x_labels = 90
+
+    xticks_fontsize = 15
+    yticks_fontsize = 18
+
+    left_adjust = 0.2
+
+    if value_to_pair in ["Power", "Achievement"]:
+        min_y, max_y = -0.5, 0.1
+    else:
+        min_y, max_y = -0.1, 0.5
+
 
 elif figure_name == "fam_ro_t":
 
@@ -284,6 +400,7 @@ elif figure_name == "fam_ro_t":
     add_legend = False
     add_title = True
     metric = "Rank-Order"
+    human_change_xloc = 6.8
     msgs_ro_tolk = False
 
     show_human_change = True
@@ -304,6 +421,7 @@ elif figure_name == "don_t":
     add_legend = False
     add_title = True
     metric = "Rank-Order"
+    human_change_xloc = 6.8
     msgs_ro_tolk = False
     rotatation_x_labels = 90
 
@@ -324,6 +442,7 @@ elif figure_name == "tolk_ro_msgs":
     msgs_ro_tolk = True
 
     metric = "Rank-Order"
+    human_change_xloc = 6.8
     interval_figsize_x = 14
     interval_figsize_y = 7
 
@@ -346,17 +465,66 @@ elif figure_name == "tolk_ips_msgs":
     msgs_ro_tolk = True
 
     metric = "Ipsative"
+    human_change_xloc = 6.8
 
     min_y, max_y = -0.1, 1  # IPS
+
+elif figure_name == "llama_sys_no_sys":
+
+    experiment_dirs = [
+        # "sim_conv_pvq_tolkien_characters_seeds",
+        # "sim_conv_pvq_tolkien_characters_seeds_NO_SYSTEM",
+        ""
+    ]
+
+    seed_strings = [f"{i}_seed" for i in range(1, 10, 2)]
+    models = [
+        "sim_conv_pvq_tolkien_characters_seeds/llama_2_7b_chat",
+        "sim_conv_pvq_tolkien_characters_seeds/llama_2_13b_chat",
+        "sim_conv_pvq_tolkien_characters_seeds/llama_2_70b_chat",  # 2 gpu
+        "sim_conv_pvq_tolkien_characters_seeds_NO_SYSTEM/llama_2_7b_chat",
+        "sim_conv_pvq_tolkien_characters_seeds_NO_SYSTEM/llama_2_13b_chat",
+        "sim_conv_pvq_tolkien_characters_seeds_NO_SYSTEM/llama_2_70b_chat",  # 2 gpu
+    ]
+    x_label_map = {
+        "sim_conv_pvq_tolkien_characters_seeds/llama_2_7b_chat": "llama_2_7b_chat_sys",
+        "sim_conv_pvq_tolkien_characters_seeds/llama_2_13b_chat": "llama_2_13b_chat_sys",
+        "sim_conv_pvq_tolkien_characters_seeds/llama_2_70b_chat": "llama_2_70b_chat_sys",  # 2 gpu
+        "sim_conv_pvq_tolkien_characters_seeds_NO_SYSTEM/llama_2_7b_chat": "llama_2_7b_chat_no_sys",
+        "sim_conv_pvq_tolkien_characters_seeds_NO_SYSTEM/llama_2_13b_chat": "llama_2_13b_chat_no_sys",
+        "sim_conv_pvq_tolkien_characters_seeds_NO_SYSTEM/llama_2_70b_chat": "llama_2_70b_chat_no_sys",  # 2 gpu
+    }
+
+    add_tolkien_ipsative_curve = False
+    bar_plots = True
+    add_legend = False
+    add_title = True
+    metric = "Rank-Order"
+    human_change_xloc = -0.5
+    msgs_ro_tolk = False
+    show_human_change = True
+    legend_fontsize = 22
+    rotatation_x_labels = 90
+    show_human_changea = False
+
+    xticks_fontsize = 15
+    yticks_fontsize = 18
+
+    min_y, max_y = -0.1, 0.8  # RO
 else:
     raise ValueError(f"Unknown figure name {figure_name}.")
 
-
+if y_label is None:
+    y_label = metric + " stability (r)"
 
 if add_tolkien_ipsative_curve:
     with open("tolkien_ipsative_curve_cache.json", "r") as f:
         tolkien_ipsative_curve = json.load(f)
 
+if add_tolkien_ro_curve:
+    with open("tolkien_ro_curve_cache.json", "r") as f:
+        tolkien_ro_curve = json.load(f)
+
 confidence = 0.95
 
 stab_mmlu_scatter = False  # overrides other plots
@@ -402,6 +570,7 @@ for experiment_dir in experiment_dirs:
                 continue
 
             data_dir = os.path.join("results", experiment_dir, model, seed_str)
+            paired_data_dir = os.path.join("results", paired_dir, model, seed_str)
 
             if len(glob.glob(data_dir+"/*/*/*.json")) < 2:
                 print(f"No evaluation found at {data_dir}.")
@@ -413,7 +582,7 @@ for experiment_dir in experiment_dirs:
                 eval_script_path = "./visualization_scripts/data_analysis.py"
                 with open(eval_script_path, 'rb') as file_obj: eval_script = str(file_obj.read())
                 hash = hashlib.sha256("-".join(
-                    [eval_script, inspect.getsource(run_analysis), checksumdir.dirhash(data_dir), str(assert_n_contexts), str(insert_dummy_participants)]
+                    [eval_script, inspect.getsource(run_analysis), checksumdir.dirhash(data_dir), str(assert_n_contexts), str(insert_dummy_participants), str(default_profile), str(paired_data_dir)]
                 ).encode()).hexdigest()
                 cache_path = f".cache/{hash}.json"
 
@@ -425,7 +594,7 @@ for experiment_dir in experiment_dirs:
 
                 else:
                     print("\t\tEvaluating")
-                    eval_data = run_analysis(eval_script_path=eval_script_path, data_dir=data_dir, assert_n_contexts=assert_n_contexts, insert_dummy_participants=insert_dummy_participants)
+                    eval_data = run_analysis(eval_script_path=eval_script_path, data_dir=data_dir, assert_n_contexts=assert_n_contexts, insert_dummy_participants=insert_dummy_participants, default_profile=default_profile, paired_data_dir=paired_data_dir)
 
                 with open(cache_path, 'w') as fp:
 
@@ -439,7 +608,8 @@ for experiment_dir in experiment_dirs:
 
             data[experiment_dir][model][seed_str] = eval_data.copy()
 
-            metrs_str = {k: np.round(v, 2) for k,v in data[experiment_dir][model][seed_str].items() if k != "All_Ipsative_corrs"}
+            keys_to_print = ["Mean-Level", "Rank-Order", "Ipsative"]
+            metrs_str = {k: np.round(v, 2) for k, v in data[experiment_dir][model][seed_str].items() if k in keys_to_print}
             print(f"\t\t- {seed_str} : {metrs_str}")
 
 
@@ -487,21 +657,23 @@ if num_cols == 1:
 else:
     axs = axs.flatten()
 
+all_ipsative_corrs_str = get_all_ipsative_corrs_str(default_profile)
+
 for plt_i, experiment_dir in enumerate(experiment_dirs):
 
     if show_human_change:
 
-        if metric == "Rank-Order":
-            xloc = 6.8
+        if default_profile:
+            metric_human = "Ipsative"
         else:
-            xloc = -1.0
+            metric_human = metric
 
-        axs[plt_i].axhline(y=human_change_10_12[metric], color=human_data_color, linestyle=':', zorder=0)
-        axs[plt_i].text(xloc, human_change_10_12[metric] + 0.01, "Human value stability between ages 10 and 12",
+        axs[plt_i].axhline(y=human_change_10_12[metric_human], color=human_data_color, linestyle=':', zorder=0)
+        axs[plt_i].text(human_change_xloc, human_change_10_12[metric_human] + 0.01, "Human value stability between ages 10 and 12",
                         fontsize=human_data_fontsize, color=human_data_color)
 
-        axs[plt_i].axhline(y=human_change_20_28[metric], color=human_data_color, linestyle=':', zorder=0)
-        axs[plt_i].text(xloc, human_change_20_28[metric] + 0.01, "Human value stability between ages 20 and 28",
+        axs[plt_i].axhline(y=human_change_20_28[metric_human], color=human_data_color, linestyle=':', zorder=0)
+        axs[plt_i].text(human_change_xloc, human_change_20_28[metric_human] + 0.01, "Human value stability between ages 20 and 28",
                         fontsize=human_data_fontsize, color=human_data_color)
 
         # axs[plt_i].scatter(models, [models_mmlu[m] for m in models], marker="x", s=5)
@@ -513,7 +685,7 @@ for plt_i, experiment_dir in enumerate(experiment_dirs):
         ).mean(axis=1)
         mmlus = [models_mmlu[m] for m in models_to_scatter]
 
-        axs[plt_i].set_ylabel(f"{metric}", fontsize=y_label_fontsize)
+        axs[plt_i].set_ylabel(y_label, fontsize=y_label_fontsize)
         axs[plt_i].set_xlabel("MMLU score", fontsize=20)
         for m,s, mod in zip(mmlus, stabs, models_to_scatter):
             axs[plt_i].text(m+0.005, s+0.005, mod, fontsize=8)
@@ -545,17 +717,23 @@ for plt_i, experiment_dir in enumerate(experiment_dirs):
 
     elif bar_plots:
 
-        plt.subplots_adjust(top=0.90, bottom=0.5, hspace=0.8)
+        plt.subplots_adjust(left=left_adjust, top=0.90, bottom=0.5, hspace=0.8)
         xs = models
-        scores = np.array([[data[experiment_dir][model][seed_str][metric] for seed_str in seed_strings] for model in models])
+        xs = [x_label_map.get(x, x) for x in xs]
+
+        if figure_name.startswith("paired_tolk_ro"):
+            scores = np.array([[data[experiment_dir][model][seed_str]['Proxy_stability'][value_to_pair] for seed_str in seed_strings] for model in models])
+        else:
+            scores = np.array([[data[experiment_dir][model][seed_str][metric] for seed_str in seed_strings] for model in models])
+
         ys = scores.mean(axis=1)
 
         # get the right side of the CI
         if "sim_conv_pvq_permutations_msgs" in experiment_dir:
-            assert len(seed_strings) == 1 # you should use plots, not bars
+            assert len(seed_strings) == 1  # you should use plots, not bars
             # [n_modelx, cont_pairs, pop_size]
-            all_corrs = np.array([data[experiment_dir][model][seed_strings[0]]["All_Ipsative_corrs"] for model in models])
-            all_corrs = all_corrs.mean(1) # mean over pairs
+            all_corrs = np.array([data[experiment_dir][model][seed_strings[0]][all_ipsative_corrs_str] for model in models])
+            all_corrs = all_corrs.mean(1)  # mean over pairs
 
             c2 = np.array([st.t.interval(confidence, len(a) - 1, loc=np.mean(a), scale=st.sem(a))[1] for a in all_corrs])
             tick_len = c2 - ys  # half the conf interval
@@ -564,8 +742,9 @@ for plt_i, experiment_dir in enumerate(experiment_dirs):
             n_msgs = models
             # [n_msgs, n_seeds, n_pairs, n_personas]
             all_corrs = np.array([[
-                data[experiment_dir][n_msg][seed_str]["All_Ipsative_corrs"] for seed_str in seed_strings
+                data[experiment_dir][n_msg][seed_str][all_ipsative_corrs_str] for seed_str in seed_strings
             ] for n_msg in n_msgs])
+
             all_corrs = all_corrs.mean(2)  # mean over context pairs
 
             # SI over what -> seed and personas
@@ -577,7 +756,6 @@ for plt_i, experiment_dir in enumerate(experiment_dirs):
             c2 = np.array([st.t.interval(confidence, len(a) - 1, loc=np.mean(a), scale=st.sem(a))[1] for a in all_corrs])
             tick_len = c2 - ys  # half the conf interval
 
-
         else:
             c2 = np.array([st.t.interval(confidence, len(a) - 1, loc=np.mean(a), scale=st.sem(a))[1] for a in scores])
             tick_len = c2 - ys  # half the conf interval
@@ -587,17 +765,46 @@ for plt_i, experiment_dir in enumerate(experiment_dirs):
 
         if bars_as_plot:
             # used for msgs
-            axs[plt_i].plot(xs, ys)
+            axs[plt_i].plot(xs, ys, label=label_)
             axs[plt_i].fill_between(xs, ys - tick_len, ys + tick_len, alpha=0.3)
 
-            if metric == "Ipsative":
+            if default_profile is not None and figure_name == "tolk_ips_msgs_default_prof":
+
+                # load ro
+                if add_tolkien_ro_curve:
+                    cprint("Loading Rank-order Tolkien Mixtral-Instruct stability from CACHE", "red")
+
+                    xs = np.array(tolkien_ro_curve["xs"])
+                    xs = [x_label_map.get(x, x) for x in xs]
+                    ys = np.array(tolkien_ro_curve["ys"])
+                    shade_len = np.array(tolkien_ro_curve["tick_len"])
+
+                    lab_ = "Rank-Order stability\n  (between contexts)"
+                    col_ = "black"
+                    axs[plt_i].plot(xs, ys, label=lab_, color=col_)
+                    axs[plt_i].fill_between(xs, ys - shade_len, ys + shade_len, alpha=0.3, color=col_)
+
+                if add_tolkien_ipsative_curve:
+                    cprint("Loading Ipsative Tolkien Mixtral-Instruct stability from CACHE", "red")
+
+                    xs = np.array(tolkien_ipsative_curve["xs"])
+                    xs = [x_label_map.get(x, x) for x in xs]
+                    ys = np.array(tolkien_ipsative_curve["ys"])
+                    shade_len = np.array(tolkien_ipsative_curve["tick_len"])
+
+                    lab_ = "Ipsative stability (between contexts)"
+                    col_ = "brown"
+                    axs[plt_i].plot(xs, ys, label=lab_, color=col_, zorder=0)
+                    axs[plt_i].fill_between(xs, ys - shade_len, ys + shade_len, alpha=0.3, color=col_, zorder=0)
+
+            if metric == "Ipsative" and figure_name == "tolk_ips_msgs":
                 tolkien_ipsative_curve = {
                     "xs": list(xs),
                     "ys": list(ys),
                     "tick_len": list(tick_len),
                 }
 
-                cprint("SAVING Ipsative Rank-order Tolkien Mixtral-Instruct stability to CACHE", "red")
+                cprint("SAVING Ipsative Tolkien Mixtral-Instruct stability to CACHE", "red")
                 with open("tolkien_ipsative_curve_cache.json", "w") as f:
                     json.dump(tolkien_ipsative_curve, f)
 
@@ -605,20 +812,30 @@ for plt_i, experiment_dir in enumerate(experiment_dirs):
             if msgs_ro_tolk:
                 axs[plt_i].bar(xs, ys, yerr=tick_len)
 
+                if metric == "Rank-Order" and figure_name == "tolk_ro_msgs":
+                    tolkien_ipsative_curve = {
+                        "xs": list(xs),
+                        "ys": list(ys),
+                        "tick_len": list(tick_len),
+                    }
+
+                    cprint("SAVING Rank-order Tolkien Mixtral-Instruct stability to CACHE", "red")
+                    with open("tolkien_ro_curve_cache.json", "w") as f:
+                        json.dump(tolkien_ipsative_curve, f)
+
             else:
                 cs = [family_2_color[model_2_family(x)] for x in xs]
                 labs = [model_2_family(x) for x in xs]
 
                 axs[plt_i].bar(xs, ys, yerr=tick_len, color=cs, label=labs)
 
-
         axs[plt_i].set_ylim(min_y, max_y)
         axs[plt_i].set_xticklabels([x_label_map.get(m, m) for m in models], rotation=rotatation_x_labels, fontsize=xticks_fontsize)
-        axs[plt_i].set_yticklabels(map(lambda x: np.round(x, 1), axs[plt_i].get_yticks()), fontsize=yticks_fontsize)
+        axs[plt_i].set_yticklabels(map(lambda x: np.round(x, round_y_lab), axs[plt_i].get_yticks()), fontsize=yticks_fontsize)
 
         # axs[plt_i].set_title(experiment_dir.replace("sim_conv_", "").replace("_seeds", ""))
 
-        axs[plt_i].set_ylabel(metric + " stability (r)", fontsize=y_label_fontsize)
+        axs[plt_i].set_ylabel(y_label, fontsize=y_label_fontsize)
 
         if add_title:
             if "donation" in experiment_dir:
@@ -635,7 +852,7 @@ for plt_i, experiment_dir in enumerate(experiment_dirs):
             axs[plt_i].set_xlabel("Simulated conversation length (n)", fontsize=x_label_fontsize)
 
         if add_legend:
-            legend_without_duplicate_labels(axs[plt_i], loc="best", title="LLM families", legend_loc=legend_loc)
+            legend_without_duplicate_labels(axs[plt_i], loc="best", title=legend_title, legend_loc=legend_loc)
 
     else:
 
@@ -659,7 +876,7 @@ for plt_i, experiment_dir in enumerate(experiment_dirs):
             ys = [data[experiment_dir][model][msg][metric] for msg in seed_strings]
 
             # [n_msgs, context pairs, n_pop]
-            all_corrs = np.array([data[experiment_dir][model][msg]["All_Ipsative_corrs"] for msg in seed_strings])
+            all_corrs = np.array([data[experiment_dir][model][msg][all_ipsative_corrs_str] for msg in seed_strings])
             all_corrs = np.mean(all_corrs, axis=1)  # average over context pairs
             c2 = np.array([
                 st.t.interval(confidence, len(msg_corrs) - 1, loc=np.mean(msg_corrs), scale=st.sem(msg_corrs))[1] for msg_corrs in all_corrs
@@ -680,13 +897,13 @@ for plt_i, experiment_dir in enumerate(experiment_dirs):
         max_y = 0.8 if metric == "Rank-Order" else 1.0
         axs[plt_i].set_ylim(-0.1, max_y)
         axs[plt_i].set_xlim(0, len(seed_strings) - 1)
-        axs[plt_i].set_ylabel(metric + " stability (r)", fontsize=y_label_fontsize)
+        axs[plt_i].set_ylabel(y_label, fontsize=y_label_fontsize)
         axs[plt_i].set_xlabel("Simulated conversation length (n)", fontsize=x_label_fontsize)
         # axs[plt_i].set_title(experiment_dir.replace("sim_conv_", "").replace("_seeds", ""))
         axs[plt_i].legend(bbox_to_anchor=(1.04, 1), loc="best", fontsize=legend_fontsize)
 
         axs[plt_i].set_xticklabels(axs[plt_i].get_xticks(), rotation=rotatation_x_labels, fontsize=xticks_fontsize)
-        axs[plt_i].set_yticklabels(map(lambda  x: np.round(x,1 ), axs[plt_i].get_yticks()), fontsize=yticks_fontsize)
+        axs[plt_i].set_yticklabels(map(lambda  x: np.round(x, round_y_lab), axs[plt_i].get_yticks()), fontsize=yticks_fontsize)
 
         plt.subplots_adjust(left=0.1, top=0.95, bottom=0.2, hspace=0.8)
 
diff --git a/evaluate.py b/evaluate.py
index 559fb28..c21a987 100644
--- a/evaluate.py
+++ b/evaluate.py
@@ -1273,6 +1273,7 @@ def remove_prefix(s, pref):
 def main(args):
     engine = args.engine
     print("Engine:", engine)
+
     subjects = sorted([f.split("_test.csv")[0] for f in os.listdir(os.path.join(args.data_dir, "test")) if "_test.csv" in f])
 
     # dump results dir
@@ -1513,6 +1514,7 @@ def main(args):
 
             num_questions = len(test_df_dict["F"])
             assert max_n_options == test_df_dict["F"].shape[1] - 2
+            n_options = [max_n_options] * num_questions
 
         else:
 
diff --git a/neurips_correlations.py b/neurips_correlations.py
deleted file mode 100644
index fc3f10e..0000000
--- a/neurips_correlations.py
+++ /dev/null
@@ -1,99 +0,0 @@
-# to be run after nerurips_correlations.sh
-import matplotlib.pyplot as plt
-from collections import defaultdict
-import csv
-import numpy as np
-import pandas as pd
-
-import numpy as np
-
-fontsize = 18
-legend = False
-max_x = 0
-
-def flat_and_remove_nan(d):
-    d=d.flatten()
-    d=d[~np.isnan(d)]
-    return d
-
-plot_data = []
-cols = []
-# variance data
-filenames = ['corr_variance.csv', 'corr_controllability.csv']
-
-questionnaire_dict = {
-    # "PVQ": "o",
-    # "Hof": "o", #"x",
-    # "Big5": "o", #"d"
-}
-
-questionnaire_legends = defaultdict(list)
-
-for questionnaire in questionnaire_dict.keys():
-    model_data = defaultdict(list)
-
-    for filename in filenames:
-
-        data_pd = pd.read_csv(filename, header=None)
-
-        for ind, row in data_pd.iterrows():
-            model_name = row[0].lstrip().rstrip()
-
-            quest_ind = {
-                "PVQ": 0,
-                "Hof": 1,
-                "Big5": 2,
-            }
-            start_ind = 1+4*quest_ind[questionnaire]
-            end_ind = start_ind + 4
-
-            data_row_pf = row[start_ind:end_ind]
-            # data_row_pf = row[1:]  # all questionnaires at once
-
-            data = pd.to_numeric(data_row_pf, errors='coerce').to_numpy(float)
-            data = flat_and_remove_nan(data)
-
-            if "variance" in filename:
-                data /= 10**3
-
-            model_data[model_name].append(data)
-
-
-    colors_dict = {
-        # "GPT-3.5": "green",
-        # "OA": "blue",
-        # "StVicuna": "red",
-        # "StLM": "pink",
-        "" : ""
-    }
-
-    for model_name, data in model_data.items():
-        max_x = max([max_x, max(data[0])])
-        scatter_plot = plt.scatter(*data, label=model_name, c=colors_dict.get(model_name, "black"), marker=questionnaire_dict[questionnaire])
-        if legend:
-            questionnaire_legends[questionnaire].append(scatter_plot)
-
-if legend:
-    for i, (questionnaire, legend_handles) in enumerate(questionnaire_legends.items()):
-        leg = plt.legend(handles=legend_handles, loc="upper right", bbox_to_anchor=(1, 1-0.3*i), fontsize=fontsize)
-        plt.gca().add_artist(leg)
-
-plt.ylabel("Correspondence", fontsize=fontsize)
-plt.xlabel("Permutation Variance", fontsize=fontsize)
-
-plt.tick_params(axis='both', which='major', labelsize=fontsize)
-
-# plt.ylim(-0.05, 0.8)
-# xmax={
-#     "PVQ": "0.12",
-#     "Hof": "0.5",  # "x",
-#     "Big5": "0.5", #"d"
-# }
-# plt.xlim(0, 0.12)
-
-# plt.show()
-quest_names = "_".join(questionnaire_dict.keys())
-filename =f"visualizations/neurips_plots/coh_var_{quest_names}.png"
-plt.savefig(filename)
-print(f"saved to {filename}")
-# plt.show()
diff --git a/neurips_correlations.sh b/neurips_correlations.sh
deleted file mode 100644
index b6c2823..0000000
--- a/neurips_correlations.sh
+++ /dev/null
@@ -1,418 +0,0 @@
-
-extract_contr_value() {
-    local input_string="$1"
-    local value=$(echo "$input_string" | grep "Mean primary value alignment" | grep -o -E '[-]*[0-9]+\.[0-9]*')
-    echo "$value"
-}
-
-extract_var_value() {
-    local input_string="$1"
-    local value=$(echo "$input_string" | grep "Permutation Var - mean (over values/traits x perspectives) of var (over perm) (\*10\^3)" | grep -o -E '[-]*[0-9]+\.[0-9]*')
-    echo "$value"
-}
-
-##### Controllability
-#exec > >(tee corr_controllability.csv)
-
-### InstructGPT
-## PVQ
-pvq_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_gpt-3.5-turbo-instruct-0914_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/pvq_gpti_50_U2`
-pvq_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_gpt-3.5-turbo-instruct-0914_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/pvq_gpti_50_U3`
-#
-## Hofstede
-hof_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_gpt-3.5-turbo-instruct-0914_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/hof_gpti_50_U2`
-hof_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_gpt-3.5-turbo-instruct-0914_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/hof_gpti_50_U3`
-#
-#
-## Big5_50
-big5_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_gpt-3.5-turbo-instruct-0914_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/big5_gpti_50_U2`
-big5_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_gpt-3.5-turbo-instruct-0914_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/big5_gpti_50_U3`
-#
-echo "GPT-3.5-turbo-instruct , $(extract_contr_value "$pvq_resS2") , $(extract_contr_value "$pvq_resS3") , $(extract_contr_value "$pvq_resU2") , $(extract_contr_value "$pvq_resU3") , $(extract_contr_value "$hof_resS2") , $(extract_contr_value "$hof_resS3") , $(extract_contr_value "$hof_resU2") , $(extract_contr_value "$hof_resU3") , $(extract_contr_value "$big5_resS2") , $(extract_contr_value "$big5_resS3") , $(extract_contr_value "$big5_resU2") , $(extract_contr_value "$big5_resU3")"
-
-exit
-###GPT4_50
-## PVQ
-#pvq_resS2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_gpt-4-0314_perm_50_System_msg_2nd_prs/* --save --filename neurips_plots/pvq_gpt4_50_S2`
-pvq_resS3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_gpt-4-0314_perm_50_System_msg_3rd_prs/* --save --filename neurips_plots/pvq_gpt4_50_S3`
-#pvq_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_gpt-4-0314_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/pvq_gpt4_50_U2`
-#pvq_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_gpt-4-0314_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/pvq_gpt4_50_U3`
-#
-## Hofstede
-#hof_resS2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_gpt-4-0314_perm_50_System_msg_2nd_prs/* --save --filename neurips_plots/hof_gpt4_50_S2`
-#hof_resS3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_gpt-4-0314_perm_50_System_msg_3rd_prs/* --save --filename neurips_plots/hof_gpt4_50_S3`
-#hof_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_gpt-4-0314_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/hof_gpt4_50_U2`
-hof_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_gpt-4-0314_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/hof_gpt4_50_U3`
-#
-#
-## Big5_50
-#big5_resS2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_gpt-4-0314_perm_50_System_msg_2nd_prs/* --save --filename neurips_plots/big5_gpt4_50_S2`
-#big5_resS3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_gpt-4-0314_perm_50_System_msg_3rd_prs/* --save --filename neurips_plots/big5_gpt4_50_S3`
-#big5_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_gpt-4-0314_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/big5_gpt4_50_U2`
-big5_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_gpt-4-0314_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/big5_gpt4_50_U3`
-#
-#echo "GPT-4 , $(extract_contr_value "$pvq_resS2") , $(extract_contr_value "$pvq_resS3") , $(extract_contr_value "$pvq_resU2") , $(extract_contr_value "$pvq_resU3") , $(extract_contr_value "$hof_resS2") , $(extract_contr_value "$hof_resS3") , $(extract_contr_value "$hof_resU2") , $(extract_contr_value "$hof_resU3") , $(extract_contr_value "$big5_resS2") , $(extract_contr_value "$big5_resS3") , $(extract_contr_value "$big5_resU2") , $(extract_contr_value "$big5_resU3")"
-
-### GPT35_50
-## PVQ
-#pvq_resS2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_gpt-3.5-turbo-0301_perm_50_System_msg_2nd_prs/* --save --filename neurips_plots/pvq_gpt35_50_S2`
-#pvq_resS3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_gpt-3.5-turbo-0301_perm_50_System_msg_3rd_prs/* --save --filename neurips_plots/pvq_gpt35_50_S3`
-#pvq_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_gpt-3.5-turbo-0301_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/pvq_gpt35_50_U2`
-#pvq_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_gpt-3.5-turbo-0301_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/pvq_gpt35_50_U3`
-#
-## Hofstede
-#hof_resS2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_gpt-3.5-turbo-0301_perm_50_System_msg_2nd_prs/* --save --filename neurips_plots/hof_gpt35_50_S2`
-#hof_resS3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_gpt-3.5-turbo-0301_perm_50_System_msg_3rd_prs/* --save --filename neurips_plots/hof_gpt35_50_S3`
-#hof_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_gpt-3.5-turbo-0301_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/hof_gpt35_50_U2`
-#hof_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_gpt-3.5-turbo-0301_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/hof_gpt35_50_U3`
-#
-#
-## Big5_50
-#big5_resS2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_gpt-3.5-turbo-0301_perm_50_System_msg_2nd_prs/* --save --filename neurips_plots/big5_gpt35_50_S2`
-#big5_resS3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_gpt-3.5-turbo-0301_perm_50_System_msg_3rd_prs/* --save --filename neurips_plots/big5_gpt35_50_S3`
-#big5_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_gpt-3.5-turbo-0301_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/big5_gpt35_50_U2`
-#big5_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_gpt-3.5-turbo-0301_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/big5_gpt35_50_U3`
-#
-#echo "GPT-3.5  , $(extract_contr_value "$pvq_resS2") , $(extract_contr_value "$pvq_resS3") , $(extract_contr_value "$pvq_resU2") , $(extract_contr_value "$pvq_resU3") , $(extract_contr_value "$hof_resS2") , $(extract_contr_value "$hof_resS3") , $(extract_contr_value "$hof_resU2") , $(extract_contr_value "$hof_resU3") , $(extract_contr_value "$big5_resS2") , $(extract_contr_value "$big5_resS3") , $(extract_contr_value "$big5_resU2") , $(extract_contr_value "$big5_resU3")"
-#
-#
-### OpenAssistant
-## PVQ
-#pvq_resS2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_openassistant_rlhf2_llama30b_perm_50_System_msg_2nd_prs/* --save --filename neurips_plots/pvq_oa_50_S2`
-#pvq_resS3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_openassistant_rlhf2_llama30b_perm_50_System_msg_3rd_prs/* --save --filename neurips_plots/pvq_oa_50_S3`
-#pvq_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_openassistant_rlhf2_llama30b_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/pvq_oa_50_U2`
-#pvq_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_openassistant_rlhf2_llama30b_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/pvq_oa_50_U3`
-#
-#
-## Hofstede
-#hof_resS2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_openassistant_rlhf2_llama30b_perm_50_System_msg_2nd_prs/* --save --filename neurips_plots/hof_oa_50_S2`
-#hof_resS3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_openassistant_rlhf2_llama30b_perm_50_System_msg_3rd_prs/* --save --filename neurips_plots/hof_oa_50_S3`
-#hof_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_openassistant_rlhf2_llama30b_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/hof_oa_50_U2`
-#hof_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_openassistant_rlhf2_llama30b_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/hof_oa_50_U3`
-#
-#
-## Big5_5
-#big5_resS2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_openassistant_rlhf2_llama30b_perm_50_System_msg_2nd_prs/* --save --filename neurips_plots/big5_oa_50_S2`
-#big5_resS3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_openassistant_rlhf2_llama30b_perm_50_System_msg_3rd_prs/* --save --filename neurips_plots/big5_oa_50_S3`
-#big5_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_openassistant_rlhf2_llama30b_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/big5_oa_50_U2`
-#big5_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_openassistant_rlhf2_llama30b_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/big5_oa_50_U3`
-#
-#echo "OA  , $(extract_contr_value "$pvq_resS2") , $(extract_contr_value "$pvq_resS3") , $(extract_contr_value "$pvq_resU2") , $(extract_contr_value "$pvq_resU3") , $(extract_contr_value "$hof_resS2") , $(extract_contr_value "$hof_resS3") , $(extract_contr_value "$hof_resU2") , $(extract_contr_value "$hof_resU3") , $(extract_contr_value "$big5_resS2") , $(extract_contr_value "$big5_resS3") , $(extract_contr_value "$big5_resU2") , $(extract_contr_value "$big5_resU3") "
-#
-#### StableVicuna
-## PVQ
-#pvq_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_stablevicuna_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/pvq_sv_50_U2`
-#pvq_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_stablevicuna_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/pvq_sv_50_U3`
-#
-## Hofstede
-#hof_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_stablevicuna_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/hof_sv_50_U2`
-#hof_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_stablevicuna_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/hof_sv_50_U3`
-#
-## Big5_5
-#big5_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_stablevicuna_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/big5_sv_50_U2`
-#big5_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_stablevicuna_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/big5_sv_50_U3`
-#
-#echo "StVicuna  , n/a , n/a , $(extract_contr_value "$pvq_resU2") , $(extract_contr_value "$pvq_resU3") , n/a , n/a , $(extract_contr_value "$hof_resU2") , $(extract_contr_value "$hof_resU3") , n/a , n/a , $(extract_contr_value "$big5_resU2") , $(extract_contr_value "$big5_resU3") "
-#
-### StableLM
-## PVQ
-#pvq_resS2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_stablelm_perm_50_System_msg_2nd_prs/* --save --filename neurips_plots/pvq_slm_50_S2`
-#pvq_resS3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_stablelm_perm_50_System_msg_3rd_prs/* --save --filename neurips_plots/pvq_slm_50_S3`
-#pvq_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_stablelm_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/pvq_slm_50_U2`
-#pvq_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_stablelm_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/pvq_slm_50_U3`
-#
-## Hofstede
-#hof_resS2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_stablelm_perm_50_System_msg_2nd_prs/* --save --filename neurips_plots/hof_slm_50_S2`
-#hof_resS3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_stablelm_perm_50_System_msg_3rd_prs/* --save --filename neurips_plots/hof_slm_50_S3`
-#hof_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_stablelm_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/hof_slm_50_U2`
-#hof_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_stablelm_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/hof_slm_50_U3`
-#
-#
-## Big5_5
-#big5_resS2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_stablelm_perm_50_System_msg_2nd_prs/* --save --filename neurips_plots/big5_slm_50_S2`
-#big5_resS3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_stablelm_perm_50_System_msg_3rd_prs/* --save --filename neurips_plots/big5_slm_50_S3`
-#big5_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_stablelm_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/big5_slm_50_U2`
-#big5_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_stablelm_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/big5_slm_50_U3`
-#
-#echo "StLM  , $(extract_contr_value "$pvq_resS2") , $(extract_contr_value "$pvq_resS3") , $(extract_contr_value "$pvq_resU2") , $(extract_contr_value "$pvq_resU3") , $(extract_contr_value "$hof_resS2") , $(extract_contr_value "$hof_resS3") , $(extract_contr_value "$hof_resU2") , $(extract_contr_value "$hof_resU3") , $(extract_contr_value "$big5_resS2") , $(extract_contr_value "$big5_resS3") , $(extract_contr_value "$big5_resU2") , $(extract_contr_value "$big5_resU3") "
-
-##### Variance
-#exec > >(tee corr_variance.csv)
-pvq_resS2=""
-pvq_resS3=""
-pvq_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_curie_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/pvq_gpt35j_50_U2`
-pvq_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_curie_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/pvq_gpt35j_50_U3`
-
-# Hofstede
-hof_resS2=""
-hof_resS3=""
-hof_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_curie_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/hof_gpt35j_50_U2`
-hof_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_curie_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/hof_gpt35j_50_U3`
-
-
-# Big5_50
-big5_resS2=""
-big5_resS3=""
-big5_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_curie_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/big5_gpt35j_50_U2`
-big5_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_curie_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/big5_gpt35j_50_U3`
-#
-##echo "Curie   & $(extract_var_value "$pvq_resS2") \textbar $(extract_var_value "$pvq_resS3") & $(extract_var_value "$pvq_resU2") \textbar $(extract_var_value "$pvq_resU3") & $(extract_var_value "$hof_resS2") \textbar $(extract_var_value "$hof_resS3") & $(extract_var_value "$hof_resU2") \textbar $(extract_var_value "$hof_resU3") & $(extract_var_value "$big5_resS2") \textbar $(extract_var_value "$big5_resS3") & $(extract_var_value "$big5_resU2") \textbar $(extract_var_value "$big5_resU3") "
-#echo "Curie   , $(extract_var_value "$pvq_resS2") , $(extract_var_value "$pvq_resS3") , $(extract_var_value "$pvq_resU2") , $(extract_var_value "$pvq_resU3") , $(extract_var_value "$hof_resS2") , $(extract_var_value "$hof_resS3") , $(extract_var_value "$hof_resU2") , $(extract_var_value "$hof_resU3") , $(extract_var_value "$big5_resS2") , $(extract_var_value "$big5_resS3") , $(extract_var_value "$big5_resU2") , $(extract_var_value "$big5_resU3") "
-#
-pvq_resS2=""
-pvq_resS3=""
-pvq_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_babbage_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/pvq_gpt35j_50_U2`
-pvq_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_babbage_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/pvq_gpt35j_50_U3`
-
-# Hofstede
-hof_resS2=""
-hof_resS3=""
-hof_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_babbage_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/hof_gpt35j_50_U2`
-hof_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_babbage_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/hof_gpt35j_50_U3`
-
-
-# Big5_50
-big5_resS2=""
-big5_resS3=""
-big5_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_babbage_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/big5_gpt35j_50_U2`
-big5_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_babbage_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/big5_gpt35j_50_U3`
-#
-##echo "Babbage   & $(extract_var_value "$pvq_resS2") \textbar $(extract_var_value "$pvq_resS3") & $(extract_var_value "$pvq_resU2") \textbar $(extract_var_value "$pvq_resU3") & $(extract_var_value "$hof_resS2") \textbar $(extract_var_value "$hof_resS3") & $(extract_var_value "$hof_resU2") \textbar $(extract_var_value "$hof_resU3") & $(extract_var_value "$big5_resS2") \textbar $(extract_var_value "$big5_resS3") & $(extract_var_value "$big5_resU2") \textbar $(extract_var_value "$big5_resU3") "
-#echo "Babbage   , $(extract_var_value "$pvq_resS2") , $(extract_var_value "$pvq_resS3") , $(extract_var_value "$pvq_resU2") , $(extract_var_value "$pvq_resU3") , $(extract_var_value "$hof_resS2") , $(extract_var_value "$hof_resS3") , $(extract_var_value "$hof_resU2") , $(extract_var_value "$hof_resU3") , $(extract_var_value "$big5_resS2") , $(extract_var_value "$big5_resS3") , $(extract_var_value "$big5_resU2") , $(extract_var_value "$big5_resU3") "
-#
-pvq_resS2=""
-pvq_resS3=""
-pvq_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_ada_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/pvq_gpt35j_50_U2`
-pvq_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_ada_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/pvq_gpt35j_50_U3`
-
-# Hofstede
-hof_resS2=""
-hof_resS3=""
-hof_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_ada_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/hof_gpt35j_50_U2`
-hof_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_ada_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/hof_gpt35j_50_U3`
-
-
-# Big5_50
-big5_resS2=""
-big5_resS3=""
-big5_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_ada_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/big5_gpt35j_50_U2`
-big5_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_ada_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/big5_gpt35j_50_U3`
-#
-##echo "Ada   & $(extract_var_value "$pvq_resS2") \textbar $(extract_var_value "$pvq_resS3") & $(extract_var_value "$pvq_resU2") \textbar $(extract_var_value "$pvq_resU3") & $(extract_var_value "$hof_resS2") \textbar $(extract_var_value "$hof_resS3") & $(extract_var_value "$hof_resU2") \textbar $(extract_var_value "$hof_resU3") & $(extract_var_value "$big5_resS2") \textbar $(extract_var_value "$big5_resS3") & $(extract_var_value "$big5_resU2") \textbar $(extract_var_value "$big5_resU3") "
-#echo "Ada   , $(extract_var_value "$pvq_resS2") , $(extract_var_value "$pvq_resS3") , $(extract_var_value "$pvq_resU2") , $(extract_var_value "$pvq_resU3") , $(extract_var_value "$hof_resS2") , $(extract_var_value "$hof_resS3") , $(extract_var_value "$hof_resU2") , $(extract_var_value "$hof_resU3") , $(extract_var_value "$big5_resS2") , $(extract_var_value "$big5_resS3") , $(extract_var_value "$big5_resU2") , $(extract_var_value "$big5_resU3") "
-#
-## upstage rp-incite-instruct
-pvq_resS2=""
-pvq_resS3=""
-pvq_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_rp_incite_7b_chat_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/pvq_gpt35j_50_U2`
-pvq_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_rp_incite_7b_chat_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/pvq_gpt35j_50_U3`
-
-# Hofstede
-hof_resS2=""
-hof_resS3=""
-hof_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_rp_incite_7b_chat_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/hof_gpt35j_50_U2`
-hof_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_rp_incite_7b_chat_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/hof_gpt35j_50_U3`
-
-
-# Big5_50
-big5_resS2=""
-big5_resS3=""
-big5_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_rp_incite_7b_chat_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/big5_gpt35j_50_U2`
-big5_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_rp_incite_7b_chat_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/big5_gpt35j_50_U3`
-#
-##echo "Redpaj-incite-chat   & $(extract_var_value "$pvq_resS2") \textbar $(extract_var_value "$pvq_resS3") & $(extract_var_value "$pvq_resU2") \textbar $(extract_var_value "$pvq_resU3") & $(extract_var_value "$hof_resS2") \textbar $(extract_var_value "$hof_resS3") & $(extract_var_value "$hof_resU2") \textbar $(extract_var_value "$hof_resU3") & $(extract_var_value "$big5_resS2") \textbar $(extract_var_value "$big5_resS3") & $(extract_var_value "$big5_resU2") \textbar $(extract_var_value "$big5_resU3") "
-#echo "Redpaj-incite-chat   , $(extract_var_value "$pvq_resS2") , $(extract_var_value "$pvq_resS3") , $(extract_var_value "$pvq_resU2") , $(extract_var_value "$pvq_resU3") , $(extract_var_value "$hof_resS2") , $(extract_var_value "$hof_resS3") , $(extract_var_value "$hof_resU2") , $(extract_var_value "$hof_resU3") , $(extract_var_value "$big5_resS2") , $(extract_var_value "$big5_resS3") , $(extract_var_value "$big5_resU2") , $(extract_var_value "$big5_resU3") "
-## upstage rp-incite-chat
-pvq_resS2=""
-pvq_resS3=""
-pvq_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_rp_incite_7b_instruct_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/pvq_gpt35j_50_U2`
-pvq_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_rp_incite_7b_instruct_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/pvq_gpt35j_50_U3`
-
-# Hofstede
-hof_resS2=""
-hof_resS3=""
-hof_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_rp_incite_7b_instruct_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/hof_gpt35j_50_U2`
-hof_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_rp_incite_7b_instruct_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/hof_gpt35j_50_U3`
-
-
-# Big5_50
-big5_resS2=""
-big5_resS3=""
-big5_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_rp_incite_7b_instruct_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/big5_gpt35j_50_U2`
-big5_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_rp_incite_7b_instruct_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/big5_gpt35j_50_U3`
-#
-##echo "Redpaj-incite-instruct   & $(extract_var_value "$pvq_resS2") \textbar $(extract_var_value "$pvq_resS3") & $(extract_var_value "$pvq_resU2") \textbar $(extract_var_value "$pvq_resU3") & $(extract_var_value "$hof_resS2") \textbar $(extract_var_value "$hof_resS3") & $(extract_var_value "$hof_resU2") \textbar $(extract_var_value "$hof_resU3") & $(extract_var_value "$big5_resS2") \textbar $(extract_var_value "$big5_resS3") & $(extract_var_value "$big5_resU2") \textbar $(extract_var_value "$big5_resU3") "
-#echo "Redpaj-incite-instruct   , $(extract_var_value "$pvq_resS2") , $(extract_var_value "$pvq_resS3") , $(extract_var_value "$pvq_resU2") , $(extract_var_value "$pvq_resU3") , $(extract_var_value "$hof_resS2") , $(extract_var_value "$hof_resS3") , $(extract_var_value "$hof_resU2") , $(extract_var_value "$hof_resU3") , $(extract_var_value "$big5_resS2") , $(extract_var_value "$big5_resS3") , $(extract_var_value "$big5_resU2") , $(extract_var_value "$big5_resU3") "
-## llama-65B
-pvq_resS2=""
-pvq_resS3=""
-pvq_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_llama_65B_perm_50__msg_2nd_prs/* --save --filename neurips_plots/pvq_gpt35j_50_U2`
-pvq_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_llama_65B_perm_50__msg_3rd_prs/* --save --filename neurips_plots/pvq_gpt35j_50_U3`
-
-# Hofstede
-hof_resS2=""
-hof_resS3=""
-hof_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hof_test_llama_65B_perm_50__msg_2nd_prs/* --save --filename neurips_plots/hof_gpt35j_50_U2`
-hof_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hof_test_llama_65B_perm_50__msg_3rd_prs/* --save --filename neurips_plots/hof_gpt35j_50_U3`
-
-
-# Big5_50
-big5_resS2=""
-big5_resS3=""
-big5_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_llama_65B_perm_50__msg_2nd_prs/* --save --filename neurips_plots/big5_gpt35j_50_U2`
-big5_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_llama_65B_perm_50__msg_3rd_prs/* --save --filename neurips_plots/big5_gpt35j_50_U3`
-#
-##echo "LLaMa-65B   & $(extract_var_value "$pvq_resS2") \textbar $(extract_var_value "$pvq_resS3") & $(extract_var_value "$pvq_resU2") \textbar $(extract_var_value "$pvq_resU3") & $(extract_var_value "$hof_resS2") \textbar $(extract_var_value "$hof_resS3") & $(extract_var_value "$hof_resU2") \textbar $(extract_var_value "$hof_resU3") & $(extract_var_value "$big5_resS2") \textbar $(extract_var_value "$big5_resS3") & $(extract_var_value "$big5_resU2") \textbar $(extract_var_value "$big5_resU3") "
-#echo "LLaMa-65B   , $(extract_var_value "$pvq_resS2") , $(extract_var_value "$pvq_resS3") , $(extract_var_value "$pvq_resU2") , $(extract_var_value "$pvq_resU3") , $(extract_var_value "$hof_resS2") , $(extract_var_value "$hof_resS3") , $(extract_var_value "$hof_resU2") , $(extract_var_value "$hof_resU3") , $(extract_var_value "$big5_resS2") , $(extract_var_value "$big5_resS3") , $(extract_var_value "$big5_resU2") , $(extract_var_value "$big5_resU3") "
-
-## upstage llama
-#
-pvq_resS2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_up_llama_60b_instruct_perm_50_System_msg_2nd_prs/* --save --filename neurips_plots/pvq_upllama_50_S2`
-pvq_resS3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_up_llama_60b_instruct_perm_50_System_msg_3rd_prs/* --save --filename neurips_plots/pvq_upllama_50_S3`
-pvq_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_up_llama_60b_instruct_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/pvq_upllama_50_U2`
-pvq_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_up_llama_60b_instruct_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/pvq_upllama_50_U3`
-
-# Hofstede
-hof_resS2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_up_llama_60b_instruct_perm_50_System_msg_2nd_prs/* --save --filename neurips_plots/hof_upllama_50_S2`
-hof_resS3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_up_llama_60b_instruct_perm_50_System_msg_3rd_prs/* --save --filename neurips_plots/hof_upllama_50_S3`
-hof_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_up_llama_60b_instruct_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/hof_upllama_50_U2`
-hof_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_up_llama_60b_instruct_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/hof_upllama_50_U3`
-
-
-# Big5_50
-big5_resS2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_up_llama_60b_instruct_perm_50_System_msg_2nd_prs/* --save --filename neurips_plots/big5_upllama_50_S2`
-big5_resS3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_up_llama_60b_instruct_perm_50_System_msg_3rd_prs/* --save --filename neurips_plots/big5_upllama_50_S3`
-big5_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_up_llama_60b_instruct_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/big5_upllama_50_U2`
-big5_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_up_llama_60b_instruct_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/big5_upllama_50_U3`
-#
-##echo "Upst-LLaMa-66B-instruct   & $(extract_var_value "$pvq_resS2") \textbar $(extract_var_value "$pvq_resS3") & $(extract_var_value "$pvq_resU2") \textbar $(extract_var_value "$pvq_resU3") & $(extract_var_value "$hof_resS2") \textbar $(extract_var_value "$hof_resS3") & $(extract_var_value "$hof_resU2") \textbar $(extract_var_value "$hof_resU3") & $(extract_var_value "$big5_resS2") \textbar $(extract_var_value "$big5_resS3") & $(extract_var_value "$big5_resU2") \textbar $(extract_var_value "$big5_resU3") "
-#echo "Upst-LLaMa-66B-instruct   , $(extract_var_value "$pvq_resS2") , $(extract_var_value "$pvq_resS3") , $(extract_var_value "$pvq_resU2") , $(extract_var_value "$pvq_resU3") , $(extract_var_value "$hof_resS2") , $(extract_var_value "$hof_resS3") , $(extract_var_value "$hof_resU2") , $(extract_var_value "$hof_resU3") , $(extract_var_value "$big5_resS2") , $(extract_var_value "$big5_resS3") , $(extract_var_value "$big5_resU2") , $(extract_var_value "$big5_resU3") "
-#
-## upstage llama2
-pvq_resS2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_up_llama2_70b_instruct_v2_perm_50_System_msg_2nd_prs/* --save --filename neurips_plots/pvq_gpt35j_50_S2`
-pvq_resS3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_up_llama2_70b_instruct_v2_perm_50_System_msg_3rd_prs/* --save --filename neurips_plots/pvq_gpt35j_50_S3`
-pvq_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_up_llama2_70b_instruct_v2_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/pvq_gpt35j_50_U2`
-pvq_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_up_llama2_70b_instruct_v2_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/pvq_gpt35j_50_U3`
-
-# Hofstede
-hof_resS2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_up_llama2_70b_instruct_v2_perm_50_System_msg_2nd_prs/* --save --filename neurips_plots/hof_gpt35j_50_S2`
-hof_resS3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_up_llama2_70b_instruct_v2_perm_50_System_msg_3rd_prs/* --save --filename neurips_plots/hof_gpt35j_50_S3`
-hof_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_up_llama2_70b_instruct_v2_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/hof_gpt35j_50_U2`
-hof_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_up_llama2_70b_instruct_v2_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/hof_gpt35j_50_U3`
-
-
-# Big5_50
-big5_resS2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_up_llama2_70b_instruct_v2_perm_50_System_msg_2nd_prs/* --save --filename neurips_plots/big5_gpt35j_50_S2`
-big5_resS3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_up_llama2_70b_instruct_v2_perm_50_System_msg_3rd_prs/* --save --filename neurips_plots/big5_gpt35j_50_S3`
-big5_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_up_llama2_70b_instruct_v2_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/big5_gpt35j_50_U2`
-big5_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_up_llama2_70b_instruct_v2_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/big5_gpt35j_50_U3`
-#
-##echo "Upst-LLaMa-2-70B-instruct   & $(extract_var_value "$pvq_resS2") \textbar $(extract_var_value "$pvq_resS3") & $(extract_var_value "$pvq_resU2") \textbar $(extract_var_value "$pvq_resU3") & $(extract_var_value "$hof_resS2") \textbar $(extract_var_value "$hof_resS3") & $(extract_var_value "$hof_resU2") \textbar $(extract_var_value "$hof_resU3") & $(extract_var_value "$big5_resS2") \textbar $(extract_var_value "$big5_resS3") & $(extract_var_value "$big5_resU2") \textbar $(extract_var_value "$big5_resU3") "
-#echo "Upst-LLaMa-2-70B-instruct   , $(extract_var_value "$pvq_resS2") , $(extract_var_value "$pvq_resS3") , $(extract_var_value "$pvq_resU2") , $(extract_var_value "$pvq_resU3") , $(extract_var_value "$hof_resS2") , $(extract_var_value "$hof_resS3") , $(extract_var_value "$hof_resU2") , $(extract_var_value "$hof_resU3") , $(extract_var_value "$big5_resS2") , $(extract_var_value "$big5_resS3") , $(extract_var_value "$big5_resU2") , $(extract_var_value "$big5_resU3") "
-#
-### GPT35_50 - june
-## PVQ
-pvq_resS2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_gpt-3.5-turbo-0613_perm_50_System_msg_2nd_prs/* --save --filename neurips_plots/pvq_gpt35j_50_S2`
-pvq_resS3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_gpt-3.5-turbo-0613_perm_50_System_msg_3rd_prs/* --save --filename neurips_plots/pvq_gpt35j_50_S3`
-pvq_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_gpt-3.5-turbo-0613_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/pvq_gpt35j_50_U2`
-pvq_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_gpt-3.5-turbo-0613_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/pvq_gpt35j_50_U3`
-
-# Hofstede
-hof_resS2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_gpt-3.5-turbo-0613_perm_50_System_msg_2nd_prs/* --save --filename neurips_plots/hof_gpt35j_50_S2`
-hof_resS3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_gpt-3.5-turbo-0613_perm_50_System_msg_3rd_prs/* --save --filename neurips_plots/hof_gpt35j_50_S3`
-hof_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_gpt-3.5-turbo-0613_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/hof_gpt35j_50_U2`
-hof_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_gpt-3.5-turbo-0613_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/hof_gpt35j_50_U3`
-
-
-# Big5_50
-big5_resS2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_gpt-3.5-turbo-0613_perm_50_System_msg_2nd_prs/* --save --filename neurips_plots/big5_gpt35j_50_S2`
-big5_resS3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_gpt-3.5-turbo-0613_perm_50_System_msg_3rd_prs/* --save --filename neurips_plots/big5_gpt35j_50_S3`
-big5_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_gpt-3.5-turbo-0613_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/big5_gpt35j_50_U2`
-big5_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_gpt-3.5-turbo-0613_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/big5_gpt35j_50_U3`
-#
-##echo "GPT-3.5-0613  & $(extract_var_value "$pvq_resS2") \textbar $(extract_var_value "$pvq_resS3") & $(extract_var_value "$pvq_resU2") \textbar $(extract_var_value "$pvq_resU3") & $(extract_var_value "$hof_resS2") \textbar $(extract_var_value "$hof_resS3") & $(extract_var_value "$hof_resU2") \textbar $(extract_var_value "$hof_resU3") & $(extract_var_value "$big5_resS2") \textbar $(extract_var_value "$big5_resS3") & $(extract_var_value "$big5_resU2") \textbar $(extract_var_value "$big5_resU3") "
-#echo "GPT-3.5-0613  , $(extract_var_value "$pvq_resS2") , $(extract_var_value "$pvq_resS3") , $(extract_var_value "$pvq_resU2") , $(extract_var_value "$pvq_resU3") , $(extract_var_value "$hof_resS2") , $(extract_var_value "$hof_resS3") , $(extract_var_value "$hof_resU2") , $(extract_var_value "$hof_resU3") , $(extract_var_value "$big5_resS2") , $(extract_var_value "$big5_resS3") , $(extract_var_value "$big5_resU2") , $(extract_var_value "$big5_resU3") "
-#
-### GPT35_50
-## PVQ
-pvq_resS2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_gpt-3.5-turbo-0301_perm_50_System_msg_2nd_prs/* --save --filename neurips_plots/pvq_gpt35_50_S2`
-pvq_resS3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_gpt-3.5-turbo-0301_perm_50_System_msg_3rd_prs/* --save --filename neurips_plots/pvq_gpt35_50_S3`
-pvq_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_gpt-3.5-turbo-0301_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/pvq_gpt35_50_U2`
-pvq_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_gpt-3.5-turbo-0301_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/pvq_gpt35_50_U3`
-
-# Hofstede
-hof_resS2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_gpt-3.5-turbo-0301_perm_50_System_msg_2nd_prs/* --save --filename neurips_plots/hof_gpt35_50_S2`
-hof_resS3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_gpt-3.5-turbo-0301_perm_50_System_msg_3rd_prs/* --save --filename neurips_plots/hof_gpt35_50_S3`
-hof_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_gpt-3.5-turbo-0301_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/hof_gpt35_50_U2`
-hof_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_gpt-3.5-turbo-0301_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/hof_gpt35_50_U3`
-
-
-# Big5_50
-big5_resS2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_gpt-3.5-turbo-0301_perm_50_System_msg_2nd_prs/* --save --filename neurips_plots/big5_gpt35_50_S2`
-big5_resS3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_gpt-3.5-turbo-0301_perm_50_System_msg_3rd_prs/* --save --filename neurips_plots/big5_gpt35_50_S3`
-big5_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_gpt-3.5-turbo-0301_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/big5_gpt35_50_U2`
-big5_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_gpt-3.5-turbo-0301_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/big5_gpt35_50_U3`
-
-#echo "GPT-3.5-0301  , $(extract_var_value "$pvq_resS2") , $(extract_var_value "$pvq_resS3") , $(extract_var_value "$pvq_resU2") , $(extract_var_value "$pvq_resU3") , $(extract_var_value "$hof_resS2") , $(extract_var_value "$hof_resS3") , $(extract_var_value "$hof_resU2") , $(extract_var_value "$hof_resU3") , $(extract_var_value "$big5_resS2") , $(extract_var_value "$big5_resS3") , $(extract_var_value "$big5_resU2") , $(extract_var_value "$big5_resU3") "
-#
-#
-### OpenAssistant
-## PVQ
-pvq_resS2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_openassistant_rlhf2_llama30b_perm_50_System_msg_2nd_prs/* --save --filename neurips_plots/pvq_oa_50_S2`
-pvq_resS3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_openassistant_rlhf2_llama30b_perm_50_System_msg_3rd_prs/* --save --filename neurips_plots/pvq_oa_50_S3`
-pvq_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_openassistant_rlhf2_llama30b_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/pvq_oa_50_U2`
-pvq_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_openassistant_rlhf2_llama30b_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/pvq_oa_50_U3`
-
-
-# Hofstede
-hof_resS2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_openassistant_rlhf2_llama30b_perm_50_System_msg_2nd_prs/* --save --filename neurips_plots/hof_oa_50_S2`
-hof_resS3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_openassistant_rlhf2_llama30b_perm_50_System_msg_3rd_prs/* --save --filename neurips_plots/hof_oa_50_S3`
-hof_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_openassistant_rlhf2_llama30b_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/hof_oa_50_U2`
-hof_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_openassistant_rlhf2_llama30b_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/hof_oa_50_U3`
-
-
-# Big5_5
-big5_resS2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_openassistant_rlhf2_llama30b_perm_50_System_msg_2nd_prs/* --save --filename neurips_plots/big5_oa_50_S2`
-big5_resS3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_openassistant_rlhf2_llama30b_perm_50_System_msg_3rd_prs/* --save --filename neurips_plots/big5_oa_50_S3`
-big5_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_openassistant_rlhf2_llama30b_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/big5_oa_50_U2`
-big5_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_openassistant_rlhf2_llama30b_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/big5_oa_50_U3`
-#
-#echo "OA  , $(extract_var_value "$pvq_resS2") , $(extract_var_value "$pvq_resS3") , $(extract_var_value "$pvq_resU2") , $(extract_var_value "$pvq_resU3") , $(extract_var_value "$hof_resS2") , $(extract_var_value "$hof_resS3") , $(extract_var_value "$hof_resU2") , $(extract_var_value "$hof_resU3") , $(extract_var_value "$big5_resS2") , $(extract_var_value "$big5_resS3") , $(extract_var_value "$big5_resU2") , $(extract_var_value "$big5_resU3") "
-#
-#### StableVicuna
-## PVQ
-pvq_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_stablevicuna_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/pvq_sv_50_U2`
-pvq_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_stablevicuna_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/pvq_sv_50_U3`
-
-# Hofstede
-hof_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_stablevicuna_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/hof_sv_50_U2`
-hof_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_stablevicuna_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/hof_sv_50_U3`
-
-# Big5_5
-big5_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_stablevicuna_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/big5_sv_50_U2`
-big5_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_stablevicuna_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/big5_sv_50_U3`
-#
-#echo "StVicuna  , n/a , n/a , $(extract_var_value "$pvq_resU2") , $(extract_var_value "$pvq_resU3") , n/a , n/a , $(extract_var_value "$hof_resU2") , $(extract_var_value "$hof_resU3") , n/a , n/a , $(extract_var_value "$big5_resU2") , $(extract_var_value "$big5_resU3") "
-#
-### StableLM
-## PVQ
-pvq_resS2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_stablelm_perm_50_System_msg_2nd_prs/* --save --filename neurips_plots/pvq_slm_50_S2`
-pvq_resS3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_stablelm_perm_50_System_msg_3rd_prs/* --save --filename neurips_plots/pvq_slm_50_S3`
-pvq_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_stablelm_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/pvq_slm_50_U2`
-pvq_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_stablelm_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/pvq_slm_50_U3`
-
-# Hofstede
-hof_resS2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_stablelm_perm_50_System_msg_2nd_prs/* --save --filename neurips_plots/hof_slm_50_S2`
-hof_resS3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_stablelm_perm_50_System_msg_3rd_prs/* --save --filename neurips_plots/hof_slm_50_S3`
-hof_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_stablelm_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/hof_slm_50_U2`
-hof_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_stablelm_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/hof_slm_50_U3`
-
-
-# Big5_5
-big5_resS2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_stablelm_perm_50_System_msg_2nd_prs/* --save --filename neurips_plots/big5_slm_50_S2`
-big5_resS3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_stablelm_perm_50_System_msg_3rd_prs/* --save --filename neurips_plots/big5_slm_50_S3`
-big5_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_stablelm_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/big5_slm_50_U2`
-big5_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_stablelm_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/big5_slm_50_U3`
-#
-#echo "StLM  , $(extract_var_value "$pvq_resS2") , $(extract_var_value "$pvq_resS3") , $(extract_var_value "$pvq_resU2") , $(extract_var_value "$pvq_resU3") , $(extract_var_value "$hof_resS2") , $(extract_var_value "$hof_resS3") , $(extract_var_value "$hof_resU2") , $(extract_var_value "$hof_resU3") , $(extract_var_value "$big5_resS2") , $(extract_var_value "$big5_resS3") , $(extract_var_value "$big5_resU2") , $(extract_var_value "$big5_resU3")"
-#
diff --git a/neurips_evaluations.sh b/neurips_evaluations.sh
deleted file mode 100644
index ed5a511..0000000
--- a/neurips_evaluations.sh
+++ /dev/null
@@ -1,331 +0,0 @@
-
-#bash autocrop visualizations/*_gpt35j_50*
-
-#python visualization_scripts/bar_viz.py results_neurips/results_lotr_pvq_test_gpt-4-0314_perm_1_System_msg_2nd_prs/* --save --filename draft/lotr_gpt4
-#
-#python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_gpt-4-0314_perm_1_System_msg_2nd_prs/* --save --filename draft/nat_lang_prof_gpt4
-#
-#python visualization_scripts/bar_viz.py results_neurips/results_AI_music_expert_pvq_test_gpt-4-0314_perm_1_System_msg_2nd_prs/* --save --filename draft/music_gpt4
-#
-#python visualization_scripts/bar_viz.py results_neurips/results_hobbies_pvq_test_gpt-4-0314_perm_1_System_msg_2nd_prs/* --save --filename draft/hobbies_gpt4
-#
-
-extract_value() {
-    local input_string="$1"
-    local value=$(echo "$input_string" | grep "Mean primary value alignment" | grep -o -E '[-]*[0-9]+\.[0-9]*')
-    echo "$value"
-}
-
-extract_var_value() {
-    local input_string="$1"
-    local value=$(echo "$input_string" | grep "Permutation Var - mean (over values/traits x perspectives) of var (over perm) (\*10\^3)" | grep -o -E '[-]*[0-9]+\.[0-9]*')
-    echo "$value"
-}
-
-
-############################3
-#### Experiment 3
-############################3
-### GPT4_5
-#echo "\multicolumn{2}{l}{\textit{5 permutations}} \\\\"
-## PVQ
-#pvq_resS2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_gpt-4-0314_perm_5_System_msg_2nd_prs/* --save --filename neurips_plots/pvq_gpt4_5_S2`
-#pvq_resS3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_gpt-4-0314_perm_5_System_msg_3rd_prs/* --save --filename neurips_plots/pvq_gpt4_5_S3`
-#pvq_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_gpt-4-0314_perm_5_User_msg_2nd_prs/* --save --filename neurips_plots/pvq_gpt4_5_U2`
-#pvq_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_gpt-4-0314_perm_5_User_msg_3rd_prs/* --save --filename neurips_plots/pvq_gpt4_5_U3`
-#
-## Hofstede
-#hof_resS2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_gpt-4-0314_perm_5_System_msg_2nd_prs/* --save --filename neurips_plots/hof_gpt4_5_S2`
-#hof_resS3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_gpt-4-0314_perm_5_System_msg_3rd_prs/* --save --filename neurips_plots/hof_gpt4_5_S3`
-#hof_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_gpt-4-0314_perm_5_User_msg_2nd_prs/* --save --filename neurips_plots/hof_gpt4_5_U2`
-#hof_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_gpt-4-0314_perm_5_User_msg_3rd_prs/* --save --filename neurips_plots/hof_gpt4_5_U3`
-#
-#
-## Big5_5
-#big5_resS2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_gpt-4-0314_perm_5_System_msg_2nd_prs/* --save --filename neurips_plots/big5_gpt4_5_S2`
-#big5_resS3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_gpt-4-0314_perm_5_System_msg_3rd_prs/* --save --filename neurips_plots/big5_gpt4_5_S3`
-#big5_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_gpt-4-0314_perm_5_User_msg_2nd_prs/* --save --filename neurips_plots/big5_gpt4_5_U2`
-#big5_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_gpt-4-0314_perm_5_User_msg_3rd_prs/* --save --filename neurips_plots/big5_gpt4_5_U3`
-#
-#echo "GPT-4 & $(extract_value "$pvq_resS2") & $(extract_value "$pvq_resS3") & $(extract_value "$pvq_resU2") & $(extract_value "$pvq_resU3") & $(extract_value "$hof_resS2") & $(extract_value "$hof_resS3") & $(extract_value "$hof_resU2") & $(extract_value "$hof_resU3") & $(extract_value "$big5_resS2") & $(extract_value "$big5_resS3") & $(extract_value "$big5_resU2") & $(extract_value "$big5_resU3") \\\\"
-
-### GPT4_5
-#echo "\multicolumn{2}{l}{\textit{5 permutations}} \\\\"
-## PVQ
-#pvq_resS2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_gpt-4-0314_perm_5_System_msg_2nd_prs/* --save --filename neurips_plots/pvq_gpt4_5_S2`
-#pvq_resS3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_gpt-4-0314_perm_5_System_msg_3rd_prs/* --save --filename neurips_plots/pvq_gpt4_5_S3`
-#pvq_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_gpt-4-0314_perm_5_User_msg_2nd_prs/* --save --filename neurips_plots/pvq_gpt4_5_U2`
-#pvq_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_gpt-4-0314_perm_5_User_msg_3rd_prs/* --save --filename neurips_plots/pvq_gpt4_5_U3`
-#
-## Hofstede
-#hof_resS2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_gpt-4-0314_perm_5_System_msg_2nd_prs/* --save --filename neurips_plots/hof_gpt4_5_S2`
-#hof_resS3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_gpt-4-0314_perm_5_System_msg_3rd_prs/* --save --filename neurips_plots/hof_gpt4_5_S3`
-#hof_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_gpt-4-0314_perm_5_User_msg_2nd_prs/* --save --filename neurips_plots/hof_gpt4_5_U2`
-#hof_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_gpt-4-0314_perm_5_User_msg_3rd_prs/* --save --filename neurips_plots/hof_gpt4_5_U3`
-#
-#
-## Big5_5
-#big5_resS2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_gpt-4-0314_perm_5_System_msg_2nd_prs/* --save --filename neurips_plots/big5_gpt4_5_S2`
-#big5_resS3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_gpt-4-0314_perm_5_System_msg_3rd_prs/* --save --filename neurips_plots/big5_gpt4_5_S3`
-#big5_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_gpt-4-0314_perm_5_User_msg_2nd_prs/* --save --filename neurips_plots/big5_gpt4_5_U2`
-#big5_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_gpt-4-0314_perm_5_User_msg_3rd_prs/* --save --filename neurips_plots/big5_gpt4_5_U3`
-#
-#echo "GPT-4 & $(extract_value "$pvq_resS2") & $(extract_value "$pvq_resS3") & $(extract_value "$pvq_resU2") & $(extract_value "$pvq_resU3") & $(extract_value "$hof_resS2") & $(extract_value "$hof_resS3") & $(extract_value "$hof_resU2") & $(extract_value "$hof_resU3") & $(extract_value "$big5_resS2") & $(extract_value "$big5_resS3") & $(extract_value "$big5_resU2") & $(extract_value "$big5_resU3") \\\\"
-
-### GPT35_5
-## PVQ
-#pvq_resS2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_gpt-3.5-turbo-0301_perm_5_System_msg_2nd_prs/* --save --filename neurips_plots/pvq_gpt35_5_S2`
-#pvq_resS3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_gpt-3.5-turbo-0301_perm_5_System_msg_3rd_prs/* --save --filename neurips_plots/pvq_gpt35_5_S3`
-#pvq_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_gpt-3.5-turbo-0301_perm_5_User_msg_2nd_prs/* --save --filename neurips_plots/pvq_gpt35_5_U2`
-#pvq_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_gpt-3.5-turbo-0301_perm_5_User_msg_3rd_prs/* --save --filename neurips_plots/pvq_gpt35_5_U3`
-#
-## Hofstede
-#hof_resS2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_gpt-3.5-turbo-0301_perm_5_System_msg_2nd_prs/* --save --filename neurips_plots/hof_gpt35_5_S2`
-#hof_resS3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_gpt-3.5-turbo-0301_perm_5_System_msg_3rd_prs/* --save --filename neurips_plots/hof_gpt35_5_S3`
-#hof_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_gpt-3.5-turbo-0301_perm_5_User_msg_2nd_prs/* --save --filename neurips_plots/hof_gpt35_5_U2`
-#hof_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_gpt-3.5-turbo-0301_perm_5_User_msg_3rd_prs/* --save --filename neurips_plots/hof_gpt35_5_U3`
-#
-## Big5_5
-#big5_resS2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_gpt-3.5-turbo-0301_perm_5_System_msg_2nd_prs/* --save --filename neurips_plots/big5_gpt35_5_S2`
-#big5_resS3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_gpt-3.5-turbo-0301_perm_5_System_msg_3rd_prs/* --save --filename neurips_plots/big5_gpt35_5_S3`
-#big5_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_gpt-3.5-turbo-0301_perm_5_User_msg_2nd_prs/* --save --filename neurips_plots/big5_gpt35_5_U2`
-#big5_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_gpt-3.5-turbo-0301_perm_5_User_msg_3rd_prs/* --save --filename neurips_plots/big5_gpt35_5_U3`
-#
-#echo "GPT-3.5 & $(extract_value "$pvq_resS2") & $(extract_value "$pvq_resS3") & $(extract_value "$pvq_resU2") & $(extract_value "$pvq_resU3") & $(extract_value "$hof_resS2") & $(extract_value "$hof_resS3") & $(extract_value "$hof_resU2") & $(extract_value "$hof_resU3") & $(extract_value "$big5_resS2") & $(extract_value "$big5_resS3") & $(extract_value "$big5_resU2") & $(extract_value "$big5_resU3") \\\\"
-
-## GPT35_5
-# PVQ
-pvq_resS2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_zephyr-7b-beta_perm_50_System_msg_2nd_prs/* --save --filename neurips_plots/pvq_zep_50_S2`
-pvq_resS3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_zephyr-7b-beta_perm_50_System_msg_3rd_prs/* --save --filename neurips_plots/pvq_zep_50_S3`
-pvq_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_zephyr-7b-beta_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/pvq_zep_50_U2`
-pvq_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_zephyr-7b-beta_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/pvq_zep_50_U3`
-
-# Hofstede
-hof_resS2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_zephyr-7b-beta_perm_50_System_msg_2nd_prs/* --save --filename neurips_plots/hof_zep_50_S2`
-hof_resS3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_zephyr-7b-beta_perm_50_System_msg_3rd_prs/* --save --filename neurips_plots/hof_zep_50_S3`
-hof_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_zephyr-7b-beta_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/hof_zep_50_U2`
-hof_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_zephyr-7b-beta_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/hof_zep_50_U3`
-
-# Big5_5
-big5_resS2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_zephyr-7b-beta_perm_50_System_msg_2nd_prs/* --save --filename neurips_plots/big5_zep_50_S2`
-big5_resS3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_zephyr-7b-beta_perm_50_System_msg_3rd_prs/* --save --filename neurips_plots/big5_zep_50_S3`
-big5_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_zephyr-7b-beta_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/big5_zep_50_U2`
-big5_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_zephyr-7b-beta_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/big5_zep_50_U3`
-
-echo "Zepyr-7B-beta & $(extract_value "$pvq_resS2") & $(extract_value "$pvq_resS3") & $(extract_value "$pvq_resU2") & $(extract_value "$pvq_resU3") & $(extract_value "$hof_resS2") & $(extract_value "$hof_resS3") & $(extract_value "$hof_resU2") & $(extract_value "$hof_resU3") & $(extract_value "$big5_resS2") & $(extract_value "$big5_resS3") & $(extract_value "$big5_resU2") & $(extract_value "$big5_resU3") \\\\"
-exit
-
-## GPT_instruct JUNE
-# PVQ
-pvq_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_gpt-3.5-turbo-instruct-0914_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/pvq_gpt35in_50_U2`
-pvq_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_gpt-3.5-turbo-instruct-0914_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/pvq_gpt35in_50_U3`
-
-# Hofstede
-hof_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_gpt-3.5-turbo-instruct-0914_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/hof_gpt35in_50_U2`
-hof_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_gpt-3.5-turbo-instruct-0914_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/hof_gpt35in_50_U3`
-
-
-# Big5_50
-big5_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_gpt-3.5-turbo-instruct-0914_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/big5_gpt35in_50_U2`
-big5_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_gpt-3.5-turbo-instruct-0914_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/big5_gpt35in_50_U3`
-
-echo "GPT-instruct  & $(extract_value "$pvq_resS2") & $(extract_value "$pvq_resS3") & $(extract_value "$pvq_resU2") & $(extract_value "$pvq_resU3") & $(extract_value "$hof_resS2") & $(extract_value "$hof_resS3") & $(extract_value "$hof_resU2") & $(extract_value "$hof_resU3") & $(extract_value "$big5_resS2") & $(extract_value "$big5_resS3") & $(extract_value "$big5_resU2") & $(extract_value "$big5_resU3") \\\\"
-
-#echo "\multicolumn{2}{l}{\textit{50 permutations}} \\\\"
-## GPT35_50 JUNE
-# PVQ
-pvq_resS2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_gpt-3.5-turbo-0613_perm_50_System_msg_2nd_prs/* --save --filename neurips_plots/pvq_gpt35j_50_S2`
-#pvq_resS3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_gpt-3.5-turbo-0613_perm_50_System_msg_3rd_prs/* --save --filename neurips_plots/pvq_gpt35j_50_S3`
-#pvq_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_gpt-3.5-turbo-0613_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/pvq_gpt35j_50_U2`
-#pvq_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_gpt-3.5-turbo-0613_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/pvq_gpt35j_50_U3`
-
-# Hofstede
-#hof_resS2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_gpt-3.5-turbo-0613_perm_50_System_msg_2nd_prs/* --save --filename neurips_plots/hof_gpt35j_50_S2`
-hof_resS3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_gpt-3.5-turbo-0613_perm_50_System_msg_3rd_prs/* --save --filename neurips_plots/hof_gpt35j_50_S3`
-#hof_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_gpt-3.5-turbo-0613_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/hof_gpt35j_50_U2`
-#hof_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_gpt-3.5-turbo-0613_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/hof_gpt35j_50_U3`
-
-
-# Big5_50
-#big5_resS2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_gpt-3.5-turbo-0613_perm_50_System_msg_2nd_prs/* --save --filename neurips_plots/big5_gpt35j_50_S2`
-big5_resS3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_gpt-3.5-turbo-0613_perm_50_System_msg_3rd_prs/* --save --filename neurips_plots/big5_gpt35j_50_S3`
-#big5_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_gpt-3.5-turbo-0613_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/big5_gpt35j_50_U2`
-#big5_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_gpt-3.5-turbo-0613_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/big5_gpt35j_50_U3`
-
-#echo "GPT-3.5-june  & $(extract_value "$pvq_resS2") & $(extract_value "$pvq_resS3") & $(extract_value "$pvq_resU2") & $(extract_value "$pvq_resU3") & $(extract_value "$hof_resS2") & $(extract_value "$hof_resS3") & $(extract_value "$hof_resU2") & $(extract_value "$hof_resU3") & $(extract_value "$big5_resS2") & $(extract_value "$big5_resS3") & $(extract_value "$big5_resU2") & $(extract_value "$big5_resU3") \\\\"
-
-
-## GPT35m_50
-# PVQ
-pvq_resS2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_gpt-3.5-turbo-0301_perm_50_System_msg_2nd_prs/* --save --filename neurips_plots/pvq_gpt35m_50_S2`
-#pvq_resS3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_gpt-3.5-turbo-0301_perm_50_System_msg_3rd_prs/* --save --filename neurips_plots/pvq_gpt35m_50_S3`
-#pvq_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_gpt-3.5-turbo-0301_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/pvq_gpt35m_50_U2`
-#pvq_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_gpt-3.5-turbo-0301_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/pvq_gpt35m_50_U3`
-
-# Hofstede
-#hof_resS2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_gpt-3.5-turbo-0301_perm_50_System_msg_2nd_prs/* --save --filename neurips_plots/hof_gpt35m_50_S2`
-#hof_resS3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_gpt-3.5-turbo-0301_perm_50_System_msg_3rd_prs/* --save --filename neurips_plots/hof_gpt35m_50_S3`
-hof_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_gpt-3.5-turbo-0301_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/hof_gpt35m_50_U2`
-#hof_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_gpt-3.5-turbo-0301_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/hof_gpt35m_50_U3`
-
-
-# Big5_50
-#big5_resS2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_gpt-3.5-turbo-0301_perm_50_System_msg_2nd_prs/* --save --filename neurips_plots/big5_gpt35m_50_S2`
-#big5_resS3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_gpt-3.5-turbo-0301_perm_50_System_msg_3rd_prs/* --save --filename neurips_plots/big5_gpt35m_50_S3`
-big5_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_gpt-3.5-turbo-0301_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/big5_gpt35m_50_U2`
-#big5_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_gpt-3.5-turbo-0301_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/big5_gpt35m_50_U3`
-
-#echo "GPT-3.5  & $(extract_value "$pvq_resS2") & $(extract_value "$pvq_resS3") & $(extract_value "$pvq_resU2") & $(extract_value "$pvq_resU3") & $(extract_value "$hof_resS2") & $(extract_value "$hof_resS3") & $(extract_value "$hof_resU2") & $(extract_value "$hof_resU3") & $(extract_value "$big5_resS2") & $(extract_value "$big5_resS3") & $(extract_value "$big5_resU2") & $(extract_value "$big5_resU3") \\\\"
-
-#
-### OpenAssistant
-## PVQ
-#pvq_resS2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_openassistant_rlhf2_llama30b_perm_50_System_msg_2nd_prs/* --save --filename neurips_plots/pvq_oa_50_S2`
-#pvq_resS3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_openassistant_rlhf2_llama30b_perm_50_System_msg_3rd_prs/* --save --filename neurips_plots/pvq_oa_50_S3`
-#pvq_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_openassistant_rlhf2_llama30b_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/pvq_oa_50_U2`
-#pvq_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_openassistant_rlhf2_llama30b_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/pvq_oa_50_U3`
-#
-## Hofstede
-#hof_resS2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_openassistant_rlhf2_llama30b_perm_50_System_msg_2nd_prs/* --save --filename neurips_plots/hof_oa_50_S2`
-#hof_resS3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_openassistant_rlhf2_llama30b_perm_50_System_msg_3rd_prs/* --save --filename neurips_plots/hof_oa_50_S3`
-#hof_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_openassistant_rlhf2_llama30b_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/hof_oa_50_U2`
-#hof_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_openassistant_rlhf2_llama30b_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/hof_oa_50_U3`
-#
-## Big5_5
-#big5_resS2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_openassistant_rlhf2_llama30b_perm_50_System_msg_2nd_prs/* --save --filename neurips_plots/big5_oa_50_S2`
-#big5_resS3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_openassistant_rlhf2_llama30b_perm_50_System_msg_3rd_prs/* --save --filename neurips_plots/big5_oa_50_S3`
-#big5_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_openassistant_rlhf2_llama30b_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/big5_oa_50_U2`
-#big5_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_openassistant_rlhf2_llama30b_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/big5_oa_50_U3`
-#
-#echo "OA  & $(extract_value "$pvq_resS2") & $(extract_value "$pvq_resS3") & $(extract_value "$pvq_resU2") & $(extract_value "$pvq_resU3") & $(extract_value "$hof_resS2") & $(extract_value "$hof_resS3") & $(extract_value "$hof_resU2") & $(extract_value "$hof_resU3") & $(extract_value "$big5_resS2") & $(extract_value "$big5_resS3") & $(extract_value "$big5_resU2") & $(extract_value "$big5_resU3") \\\\"
-#
-#### StableVicuna
-## PVQ
-#pvq_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_stablevicuna_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/pvq_sv_50_U2`
-#pvq_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_stablevicuna_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/pvq_sv_50_U3`
-#
-## Hofstede
-#hof_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_stablevicuna_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/hof_sv_50_U2`
-#hof_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_stablevicuna_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/hof_sv_50_U3`
-#
-## Big5_5
-#big5_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_stablevicuna_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/big5_sv_50_U2`
-#big5_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_stablevicuna_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/big5_sv_50_U3`
-#
-#echo "StVicuna  & n/a & n/a & $(extract_value "$pvq_resU2") & $(extract_value "$pvq_resU3") & n/a & n/a & $(extract_value "$hof_resU2") & $(extract_value "$hof_resU3") & n/a & n/a & $(extract_value "$big5_resU2") & $(extract_value "$big5_resU3") \\\\"
-#
-### StableLM
-## PVQ
-#pvq_resS2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_stablelm_perm_50_System_msg_2nd_prs/* --save --filename neurips_plots/pvq_slm_50_S2`
-#pvq_resS3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_stablelm_perm_50_System_msg_3rd_prs/* --save --filename neurips_plots/pvq_slm_50_S3`
-#pvq_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_stablelm_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/pvq_slm_50_U2`
-#pvq_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_stablelm_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/pvq_slm_50_U3`
-#
-## Hofstede
-#hof_resS2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_stablelm_perm_50_System_msg_2nd_prs/* --save --filename neurips_plots/hof_slm_50_S2`
-#hof_resS3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_stablelm_perm_50_System_msg_3rd_prs/* --save --filename neurips_plots/hof_slm_50_S3`
-#hof_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_stablelm_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/hof_slm_50_U2`
-#hof_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_stablelm_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/hof_slm_50_U3`
-#
-#
-## Big5_5
-#big5_resS2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_stablelm_perm_50_System_msg_2nd_prs/* --save --filename neurips_plots/big5_slm_50_S2`
-#big5_resS3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_stablelm_perm_50_System_msg_3rd_prs/* --save --filename neurips_plots/big5_slm_50_S3`
-#big5_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_stablelm_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/big5_slm_50_U2`
-#big5_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_stablelm_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/big5_slm_50_U3`
-#
-#echo "StLM  & $(extract_value "$pvq_resS2") & $(extract_value "$pvq_resS3") & $(extract_value "$pvq_resU2") & $(extract_value "$pvq_resU3") & $(extract_value "$hof_resS2") & $(extract_value "$hof_resS3") & $(extract_value "$hof_resU2") & $(extract_value "$hof_resU3") & $(extract_value "$big5_resS2") & $(extract_value "$big5_resS3") & $(extract_value "$big5_resU2") & $(extract_value "$big5_resU3") \\\\"
-#
-#### LLaMa-65B
-## PVQ
-#pvq_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_llama_65B_perm_50__msg_2nd_prs/* --save --filename neurips_plots/pvq_llama_50_U2`
-#pvq_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_llama_65B_perm_50__msg_3rd_prs/* --save --filename neurips_plots/pvq_llama_50_U3`
-#
-## Hofstede
-#hof_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hof_test_llama_65B_perm_50__msg_2nd_prs/* --save --filename neurips_plots/hof_llama_50_U2`
-#hof_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hof_test_llama_65B_perm_50__msg_3rd_prs/* --save --filename neurips_plots/hof_llama_50_U3`
-#
-## Big5_5
-#big5_resU2=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_llama_65B_perm_50__msg_2nd_prs/* --save --filename neurips_plots/big5_llama_50_U2`
-#big5_resU3=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_llama_65B_perm_50__msg_3rd_prs/* --save --filename neurips_plots/big5_llama_50_U3`
-#
-#echo "LLaMa-65B  & n/a & n/a & $(extract_value "$pvq_resU2") & $(extract_value "$pvq_resU3") & n/a & n/a & $(extract_value "$hof_resU2") & $(extract_value "$hof_resU3") & n/a & n/a & $(extract_value "$big5_resU2") & $(extract_value "$big5_resU3") \\\\"
-
-
-
-##############################3
-###### Experiment 4
-##############################3
-#
-#echo "\multicolumn{2}{l}{\textit{50 permutations}} \\\\"
-### GPT35_50
-## PVQ
-#pvq_m=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_gpt-3.5-turbo-0301_perm_50_System_msg_2nd_prs_intensity_slight/* --save --filename neurips_plots/pvq_gpt35_S2`
-#pvq_h=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_gpt-3.5-turbo-0301_perm_50_System_msg_2nd_prs_intensity_more/* --save --filename neurips_plots/pvq_gpt35_S2`
-#pvq_eh=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_gpt-3.5-turbo-0301_perm_50_System_msg_2nd_prs/* --save --filename neurips_plots/pvq_gpt35_S2`
-#
-## Hofstede
-#hof_m=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_gpt-3.5-turbo-0301_perm_50_User_msg_2nd_prs_intensity_slight/* --save --filename neurips_plots/hof_gpt35_U2`
-#hof_h=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_gpt-3.5-turbo-0301_perm_50_User_msg_2nd_prs_intensity_more/* --save --filename neurips_plots/hof_gpt35_U2`
-#hof_eh=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_gpt-3.5-turbo-0301_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/hof_gpt35_U2`
-#
-## Big5_50
-#big5_m=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_gpt-3.5-turbo-0301_perm_50_User_msg_2nd_prs_intensity_slight/* --save --filename neurips_plots/big5_gpt35_U2`
-#big5_h=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_gpt-3.5-turbo-0301_perm_50_User_msg_2nd_prs_intensity_more/* --save --filename neurips_plots/big5_gpt35_U2`
-#big5_eh=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_gpt-3.5-turbo-0301_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/big5_gpt35_U2`
-#
-#echo "GPT-3.5 & $(extract_value "$pvq_m") & $(extract_value "$pvq_h") & $(extract_value "$pvq_eh") & $(extract_value "$hof_m") & $(extract_value "$hof_h") & $(extract_value "$hof_eh") & $(extract_value "$big5_m") & $(extract_value "$big5_h") & $(extract_value "$big5_eh") \\\\"
-#
-### OpenAssistant
-## PVQ
-#pvq_m=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_openassistant_rlhf2_llama30b_perm_50_User_msg_2nd_prs_intensity_slight/* --save --filename neurips_plots/pvq_gpt35_U2`
-#pvq_h=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_openassistant_rlhf2_llama30b_perm_50_User_msg_2nd_prs_intensity_more/* --save --filename neurips_plots/pvq_gpt35_U2`
-#pvq_eh=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_openassistant_rlhf2_llama30b_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/pvq_gpt35_U2`
-#
-## Hofstede
-#hof_m=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_openassistant_rlhf2_llama30b_perm_50_User_msg_3rd_prs_intensity_slight/* --save --filename neurips_plots/hof_gpt35_U3`
-#hof_h=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_openassistant_rlhf2_llama30b_perm_50_User_msg_3rd_prs_intensity_more/* --save --filename neurips_plots/hof_gpt35_U3`
-#hof_eh=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_openassistant_rlhf2_llama30b_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/hof_gpt35_U3`
-#
-## Big5_5
-#big5_m=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_openassistant_rlhf2_llama30b_perm_50_User_msg_3rd_prs_intensity_slight/* --save --filename neurips_plots/big5_gpt35_U3`
-#big5_h=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_openassistant_rlhf2_llama30b_perm_50_User_msg_3rd_prs_intensity_more/* --save --filename neurips_plots/big5_gpt35_U3`
-#big5_eh=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_openassistant_rlhf2_llama30b_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/big5_gpt35_U3`
-#
-#echo "OA & $(extract_value "$pvq_m") & $(extract_value "$pvq_h") & $(extract_value "$pvq_eh") & $(extract_value "$hof_m") & $(extract_value "$hof_h") & $(extract_value "$hof_eh") & $(extract_value "$big5_m") & $(extract_value "$big5_h") & $(extract_value "$big5_eh") \\\\"
-#
-#### StableVicuna
-## PVQ
-#pvq_m=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_stablevicuna_perm_50_User_msg_2nd_prs_intensity_slight/* --save --filename neurips_plots/pvq_gpt35_U2`
-#pvq_h=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_stablevicuna_perm_50_User_msg_2nd_prs_intensity_more/* --save --filename neurips_plots/pvq_gpt35_U2`
-#pvq_eh=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_stablevicuna_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/pvq_gpt35_U2`
-#
-## Hofstede
-#hof_m=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_stablevicuna_perm_50_User_msg_3rd_prs_intensity_slight/* --save --filename neurips_plots/hof_gpt35_U3`
-#hof_h=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_stablevicuna_perm_50_User_msg_3rd_prs_intensity_more/* --save --filename neurips_plots/hof_gpt35_U3`
-#hof_eh=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_stablevicuna_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/hof_gpt35_U3`
-#
-## Big5_5
-#big5_m=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_stablevicuna_perm_50_User_msg_3rd_prs_intensity_slight/* --save --filename neurips_plots/big5_gpt35_U3`
-#big5_h=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_stablevicuna_perm_50_User_msg_3rd_prs_intensity_more/* --save --filename neurips_plots/big5_gpt35_U3`
-#big5_eh=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_stablevicuna_perm_50_User_msg_3rd_prs/* --save --filename neurips_plots/big5_gpt35_U3`
-#
-#echo "StVicuna & $(extract_value "$pvq_m") & $(extract_value "$pvq_h") & $(extract_value "$pvq_eh") & $(extract_value "$hof_m") & $(extract_value "$hof_h") & $(extract_value "$hof_eh") & $(extract_value "$big5_m") & $(extract_value "$big5_h") & $(extract_value "$big5_eh") \\\\"
-#
-### StableLM
-## PVQ
-#pvq_m=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_stablelm_perm_50_User_msg_2nd_prs_intensity_slight/* --save --filename neurips_plots/pvq_gpt35_U2`
-#pvq_h=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_stablelm_perm_50_User_msg_2nd_prs_intensity_more/* --save --filename neurips_plots/pvq_gpt35_U2`
-#pvq_eh=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_pvq_test_stablelm_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/pvq_gpt35_U2`
-#
-## Hofstede
-#hof_m=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_stablelm_perm_50_System_msg_3rd_prs_intensity_slight/* --save --filename neurips_plots/hof_gpt35_S3`
-#hof_h=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_stablelm_perm_50_System_msg_3rd_prs_intensity_more/* --save --filename neurips_plots/hof_gpt35_S3`
-#hof_eh=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_hofstede_test_stablelm_perm_50_System_msg_3rd_prs/* --save --filename neurips_plots/hof_gpt35_S3`
-#
-## Big5_5
-#big5_m=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_stablelm_perm_50_User_msg_2nd_prs_intensity_slight/* --save --filename neurips_plots/big5_gpt35_U2`
-#big5_h=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_stablelm_perm_50_User_msg_2nd_prs_intensity_more/* --save --filename neurips_plots/big5_gpt35_U2`
-#big5_eh=`python visualization_scripts/bar_viz.py results_neurips/results_nat_lang_prof_big5_test_stablelm_perm_50_User_msg_2nd_prs/* --save --filename neurips_plots/big5_gpt35_U2`
-#
-#echo "StLM & $(extract_value "$pvq_m") & $(extract_value "$pvq_h") & $(extract_value "$pvq_eh") & $(extract_value "$hof_m") & $(extract_value "$hof_h") & $(extract_value "$hof_eh") & $(extract_value "$big5_m") & $(extract_value "$big5_h") & $(extract_value "$big5_eh") \\\\"
diff --git a/parse_global_opinions_dataset.py b/parse_global_opinions_dataset.py
new file mode 100644
index 0000000..21b4a55
--- /dev/null
+++ b/parse_global_opinions_dataset.py
@@ -0,0 +1,41 @@
+from datasets import load_dataset
+import pandas as pd
+
+dataset = load_dataset("Anthropic/llm_global_opinions", split="train")
+wvs_dataset = dataset.filter(lambda ex: ex['source'] == "WVS")
+
+from IPython import embed; embed();
+card_1 = wvs_dataset.filter(lambda ex: str(ex['question']).startswith("For each of the following aspects, indicate how important it is in your life. Would you say it is very important, rather important, not very important or not important at all"))
+card_2 = wvs_dataset.filter(lambda ex: str(ex['question']).startswith("Here is a list of qualities that children can be encouraged to learn at home. Which, if any, do you consider to be especially important? Please choose up to five."))
+# missing from 2: religious faith
+# keys = list(card_2.features)
+keys = ['question', 'selections', 'options', 'source']
+new_element = pd.Series({
+    'question': "Here is a list of qualities that children can be encouraged to learn at home. Which, if any, do you consider to be especially important? Please choose up to five.",
+    'selections': None,
+    'options': ...,
+    'source': "WVS",
+})
+
+card_3_1 = wvs_dataset.filter(lambda ex: str(ex['question']).startswith("On this list are various groups of people. Could you please mention any that you would not like to have as neighbors?"))
+
+card_3_2 = wvs_dataset.filter(lambda ex: str(ex['question']).startswith("For each of the following statements I read out, can you tell me how much you agree with each. Do you agree strongly, agree, disagree, or disagree strongly?"))
+# missing from 3_2
+# One of my main goals in life has been to make my parents proud
+# On the whole, men make better business executives than women do
+
+card_3_3 = wvs_dataset.filter(lambda ex: str(ex['question']).startswith(""))
+# 3_3 missing
+# How would you feel about the following statements? Do you agree or disagree with them?
+# When jobs are scarce, employers should give priority to people of this country over immigrants
+# If a woman earns more money than her husband, it's almost certain to cause problems
+# Homosexual couples are as good parents as other couples
+# It is a duty towards society to have children
+# Adult children have the duty to provide long-term care for their parents
+# People who donâ€™t work turn lazy
+# Work is a duty towards society
+# Work should always come first, even if it means less spare time
+
+card_4_1 = wvs_dataset.filter(lambda ex: str(ex['question']).startswith("On this card are three basic kinds of attitudes concerning the society we live in."))
+card_4_2 = wvs_dataset.filter(lambda ex: str(ex['question']).startswith("I'm going to read out a list of various changes in our way of life that might take place in the near future"))
+
diff --git a/parse_res_json.py b/parse_res_json.py
index 3874af8..f59671a 100644
--- a/parse_res_json.py
+++ b/parse_res_json.py
@@ -62,7 +62,6 @@ def process_json(json_path, show_values=False):
 
                 # print(" ".join([f"{val.replace('Acceptance Rate ','')}:{sc:.2f}" for val, sc in metr[test_set].items()]))
 
-    # from IPython import embed; embed();
 
 def load_json_file(file_path):
     try:
@@ -76,7 +75,6 @@ def load_json_file(file_path):
     except Exception as e:
         print(f"An error occurred: {e}")
         
-        
 
 def find_most_common_element(lst):
     if not lst:
@@ -87,7 +85,6 @@ def find_most_common_element(lst):
     return most_common
 
 
-
 # Function to load a JSON file
 def load_json_file(file_path):
     try:
diff --git a/personas/tolkien_characters/tolkien_characters_races.txt b/personas/tolkien_characters/tolkien_characters_races.txt
new file mode 100644
index 0000000..4be7a31
--- /dev/null
+++ b/personas/tolkien_characters/tolkien_characters_races.txt
@@ -0,0 +1,60 @@
+gg
+bg
+ho
+hu
+bg
+bg
+el
+el
+gg
+el
+ho
+el
+el
+el
+gg
+ho
+bg
+bg
+hu
+hu
+ho
+el
+hu
+hu
+hu
+el
+gg
+el
+hu
+hu
+bg
+bg
+el
+el
+ho
+gg
+bg
+gg
+hu
+hu
+hu
+el
+hu
+dw
+ho
+el
+hu
+el
+el
+el
+hu
+hu
+el
+el
+el
+bg
+bg
+gg
+bg
+dw
\ No newline at end of file
diff --git a/price_estimate.py b/price_estimate.py
deleted file mode 100644
index 1fbe80b..0000000
--- a/price_estimate.py
+++ /dev/null
@@ -1,89 +0,0 @@
-# PVQ (lotr) - one eval:
-# total GPT tokens used: 5200
-#         gpt-4 ~ 0.2080 dollars
-#         gpt-3.5 ~ 0.0104 dollars
-#         davinci ~ 0.1040 dollars
-#         curie ~ 0.0104 dollars
-#         babagge ~ 0.0026 dollars
-#         ada ~ 0.0021 dollars
-
-# PVQ (high detail) - one eval:
-# total GPT tokens used: 7840
-#         gpt-4 ~ 0.3136 dollars
-#         gpt-3.5 ~ 0.0157 dollars
-#         davinci ~ 0.1568 dollars
-#         curie ~ 0.0157 dollars
-#         babagge ~ 0.0039 dollars
-#         ada ~ 0.0031 dollars
-
-
-# one eval
-
-# n_tokens_per_persp = 5200
-# gpt_4 = 0.208
-# gpt_35 = 0.01
-# davinci = 0.1
-
-# n_tokens_per_persp = 7840
-# gpt_4 = 0.3136
-# gpt_35 = 0.0157
-# davinci = 0.1568
-
-# n_tokens_per_persp = 5560
-# gpt_4 = 0.2224
-# gpt_35 = 0.0111
-# davinci = 0.1112
-
-# price per token
-gpt_4 = 0.03/1000
-gpt_35 = 0.0015/1000
-davinci = 0.02/1000
-
-
-# 1. PVQ: lotr 5 + prim 0  -> 5 ( prim is in 3.)
-# exp1 = 0
-exp1 = 5 * 4640 + 4 * 4640
-
-# conversations
-# 230 tokens is the longest conversation
-exp_conv = 5 * (4640 + 40*230)
-
-# 2. PVQ:music 6 +  hobbies: 5 -> 11
-# exp2 = 0
-exp2 = 6 * 4600 + 5 * 4520
-
-# 3. message person: (PVQ: 4 HOF: 6 B5: 5) x 4 settings = 15*4 -> 60
-pvq_3 = 4*4*5040*1
-hof_3 = 4*6*2200*0
-big5_3 = 4*5*3500*0  # 50 items
-big5_100_3 = 4*5*7083*0  # 100 items
-
-
-exp3 = pvq_3 + hof_3 + big5_3 + big5_100_3
-
-# 4. smooth: (PVQ: 4 HOF: 6 B5: 5) x 2 settings = 15*2 -> 30 (one is covered in 3.)
-# pvq = 2* 4 * 5320  # 2nd system
-pvq_4 = 2*4*5760*0
-hof_4 = 2*6*2200*0
-big5_4 = 2*5*3500*0  # 50 items
-big5_100_4 = 2*5*7083*0  # 100 items
-exp4 = pvq_4 + hof_4 + big5_4 + big5_100_4
-
-
-n_permutations = 50
-print("n_permutations:", n_permutations)
-
-total_persp_x_tokens = sum([exp1, exp2, exp3, exp4])
-
-total_persp_x_tokens = exp1
-# total_persp_x_tokens = exp1 + exp2
-# total_persp_x_tokens = exp1
-
-total_tokens = n_permutations * total_persp_x_tokens
-
-print("total tokens:", total_tokens)
-
-price_gpt4 = gpt_4 * total_tokens
-price_gpt35 = gpt_35 * total_tokens
-price_davinci = davinci * total_tokens
-print(f"Total price:\n\tGPT4: {price_gpt4}\n\tGPT35: {price_gpt35}\n\tDavinci: {price_davinci}")
\ No newline at end of file
diff --git a/run_campaign_sim_conv_no_pop.sh b/run_campaign_sim_conv_no_pop.sh
index 42ce237..dfd6528 100644
--- a/run_campaign_sim_conv_no_pop.sh
+++ b/run_campaign_sim_conv_no_pop.sh
@@ -1,16 +1,19 @@
 #!/bin/bash
 #SBATCH -A imi@a100
 #SBATCH -C a100
-#SBATCH --time=03:59:59
-#SBATCH --gres=gpu:1
-#SBATCH --array=0-24 # themes x n_msg -> 5x5
+#SBATCH --time=01:59:59
+#SBATCH --gres=gpu:4
+##SBATCH --array=0-24 # themes x n_msg -> 5x5 (no default profile, only contexts)
+#SBATCH --array=29-29 # just no conversation, default profile
 #SBATCH -o slurm_logs/sb_log_%A_%a.out
 #SBATCH -e slurm_logs/sb_log_%A_%a.err
 ##SBATCH --qos=qos_gpu-dev
 
-#####################################################
-### Simulated Conversations
-#####################################################
+## PVQ
+test_tag="pvq"
+experiment_name="pvq_test"
+data_dir="data_pvq"
+population_type="permutations"
 
 # extract theme and n_msgs
 n_msgs_list=(9 7 5 3 1) # 5
@@ -23,20 +26,27 @@ themes=(
   "poem"
   "history"
   "chess"
+  "None"
 )
 themes_len=${#themes[@]}
 
+SLURM_ARRAY_TASK_ID=25
 echo "ID:"$SLURM_ARRAY_TASK_ID
 
+
 theme_i=$(( SLURM_ARRAY_TASK_ID / $n_msgs_len ))
-msgs_i=$(( SLURM_ARRAY_TASK_ID % $themes_len ))
+msgs_i=$(( SLURM_ARRAY_TASK_ID % $n_msgs_len ))
 
-echo "Theme_i:"$theme_i
-echo "Msg_i:"$msgs_i
+#  echo "Theme_i:"$theme_i
+#  echo "Msg_i:"$msgs_i
 
 theme="${themes[$theme_i]}"
 n_msgs="${n_msgs_list[$msgs_i]}"
 
+echo "Theme:"$theme
+echo "Msg:"$n_msgs
+
+
 
 permute_options_seed=$theme_i
 
@@ -73,11 +83,6 @@ all_engines=(
 engine="${all_engines[$1]}"
 
 
-## PVQ
-test_tag="pvq"
-experiment_name="pvq_test"
-data_dir="data_pvq"
-population_type="permutations"
 
 
 echo "Evaluation:$engine:$theme:$permute_options_seed:$n_msgs"
@@ -91,7 +96,6 @@ mkdir -p $LOG_DIR
 
 source $HOME/.bashrc
 
-#PY='/gpfsscratch/rech/imi/utu57ed/miniconda3/envs/llm_persp/bin/python'
 conda activate llm_persp
 
 
diff --git a/run_campaign_sim_conv_pvq_msgs.sh b/run_campaign_sim_conv_pvq_msgs.sh
index bddfe80..e452b41 100644
--- a/run_campaign_sim_conv_pvq_msgs.sh
+++ b/run_campaign_sim_conv_pvq_msgs.sh
@@ -8,8 +8,22 @@
 #SBATCH -e slurm_logs/sb_log_%A_%a.err
 ##SBATCH --qos=qos_gpu-dev
 
-#####################################################
-### Simulated Conversations
+##########################################################
+# Set the questionnaire and population (uncomment he corresponding 4 lines)
+##########################################################
+
+## PVQ - tolkien characters
+test_tag="pvq"
+experiment_name="pvq_test"
+data_dir="data_pvq"
+population_type="tolkien_characters"
+
+### PVQ - real world persona
+#test_tag="pvq"
+#experiment_name="pvq_test"
+#data_dir="data_pvq"
+#population_type="famous_people"
+
 #####################################################
 
 # extract theme n_msgs and seed
@@ -74,13 +88,6 @@ all_engines=(
 engine="${all_engines[$1]}"
 
 
-## PVQ
-test_tag="pvq"
-experiment_name="pvq_test"
-data_dir="data_pvq"
-population_type="tolkien_characters"
-#population_type="famous_people"
-
 
 
 echo "Evaluation:$engine:$theme:$permute_options_seed:$n_msgs"
@@ -95,7 +102,6 @@ mkdir -p $LOG_DIR
 
 source $HOME/.bashrc
 
-#PY='/gpfsscratch/rech/imi/utu57ed/miniconda3/envs/llm_persp/bin/python'
 conda activate llm_persp
 
 
diff --git a/run_campaign_sim_conv_pvq_seeds.sh b/run_campaign_sim_conv_pvq_seeds.sh
index c57bb74..3828f30 100644
--- a/run_campaign_sim_conv_pvq_seeds.sh
+++ b/run_campaign_sim_conv_pvq_seeds.sh
@@ -3,14 +3,37 @@
 #SBATCH -C a100
 #SBATCH --time=03:59:59
 #SBATCH --gres=gpu:1
-#SBATCH --array=0-29 # themes x n_msg -> 6x5
+#SBATCH --array=0-29 # themes x n_seeds -> 6x5
 #SBATCH -o slurm_logs/sb_log_%A_%a.out
 #SBATCH -e slurm_logs/sb_log_%A_%a.err
 ##SBATCH --qos=qos_gpu-dev
 
+
+##########################################################
+# Set the questionnaire and population (uncomment he corresponding 4 lines)
+##########################################################
+
+## PVQ - tolkien characters
+test_tag="pvq"
+experiment_name="pvq_test"
+data_dir="data_pvq"
+population_type="tolkien_characters"
+
+### PVQ - real world persona
+#test_tag="pvq"
+#experiment_name="pvq_test"
+#data_dir="data_pvq"
+#population_type="famous_people"
+
+## Donation - tolkien characters
+#test_tag="tolkien_donation"
+#experiment_name="tolkien_donation_test"
+#data_dir="data_tolkien_donation"
+#population_type="tolkien_characters"
+
 #####################################################
-### Simulated Conversations
-#####################################################
+
+
 
 # extract theme and n_msgs
 seed_list=(1 3 5 7 9)
@@ -43,6 +66,8 @@ echo "Theme:"$theme
 echo "Seed_i:"$seed_i
 echo "Seed:"$seed
 
+exit
+
 n_msgs=3
 
 permute_options_seed="$seed"_"$theme_i"
@@ -70,36 +95,12 @@ all_engines=(
   "Qwen-7B"
   "Qwen-72B-Chat"
   "dummy"
-#  "gpt-3.5-turbo-0301"
-#  "gpt-3.5-turbo-0613"
-#  "gpt-3.5-turbo-1106"
-#  "gpt-3.5-turbo-instruct-0914"
 )
 
 # Select engine based on provided index
 engine="${all_engines[$1]}"
 
 
-## PVQ
-#test_tag="pvq"
-#experiment_name="pvq_test"
-#data_dir="data_pvq"
-##population_type="tolkien_characters"
-#population_type="famous_people"
-
-## Tolkien DON
-#test_tag="tolkien_donation"
-#experiment_name="tolkien_donation_test"
-#data_dir="data_tolkien_donation"
-#population_type="tolkien_characters"
-
-# Tolkien DON
-test_tag="wvs_svas"
-experiment_name="wvs_svas_test"
-data_dir="data_wvs_svas"
-population_type="tolkien_characters"
-
-
 echo "Evaluation:$engine:$theme:$permute_options_seed:$n_msgs"
 
 SUBDIR="sim_conv_"$test_tag"_"$population_type"_seeds/"$engine"/"$seed"_seed/results_sim_conv_"$population_type"_"$engine"_msgs_"$n_msgs
@@ -111,7 +112,6 @@ mkdir -p $LOG_DIR
 
 source $HOME/.bashrc
 
-#PY='/gpfsscratch/rech/imi/utu57ed/miniconda3/envs/llm_persp/bin/python'
 conda activate llm_persp
 
 
diff --git a/run_campaign_sim_conv_pvq_seeds_NO_SYSTEM.sh b/run_campaign_sim_conv_pvq_seeds_NO_SYSTEM.sh
index b797e8a..03a67b3 100644
--- a/run_campaign_sim_conv_pvq_seeds_NO_SYSTEM.sh
+++ b/run_campaign_sim_conv_pvq_seeds_NO_SYSTEM.sh
@@ -8,8 +8,28 @@
 #SBATCH -e slurm_logs/sb_log_%A_%a.err
 ##SBATCH --qos=qos_gpu-dev
 
-#####################################################
-### Simulated Conversations
+##########################################################
+# Set the questionnaire and population (uncomment he corresponding 4 lines)
+##########################################################
+
+## PVQ - tolkien characters
+test_tag="pvq"
+experiment_name="pvq_test"
+data_dir="data_pvq"
+population_type="tolkien_characters"
+
+### PVQ - real world persona
+#test_tag="pvq"
+#experiment_name="pvq_test"
+#data_dir="data_pvq"
+#population_type="famous_people"
+
+## Donation - tolkien characters
+#test_tag="tolkien_donation"
+#experiment_name="tolkien_donation_test"
+#data_dir="data_tolkien_donation"
+#population_type="tolkien_characters"
+
 #####################################################
 
 # extract theme and n_msgs
@@ -58,19 +78,6 @@ all_engines=(
 engine="${all_engines[$1]}"
 
 
-## PVQ
-test_tag="pvq"
-experiment_name="pvq_test"
-data_dir="data_pvq"
-population_type="tolkien_characters"
-#population_type="famous_people"
-
-## Tolkien DON
-#test_tag="tolkien_donation"
-#experiment_name="tolkien_donation_test"
-#data_dir="data_tolkien_donation"
-#population_type="tolkien_characters"
-
 
 echo "Evaluation:$engine:$theme:$permute_options_seed:$n_msgs"
 
@@ -83,7 +90,6 @@ mkdir -p $LOG_DIR
 
 source $HOME/.bashrc
 
-#PY='/gpfsscratch/rech/imi/utu57ed/miniconda3/envs/llm_persp/bin/python'
 conda activate llm_persp
 
 
diff --git a/run_dummy.sh b/run_dummy.sh
index b357021..80e961d 100644
--- a/run_dummy.sh
+++ b/run_dummy.sh
@@ -25,15 +25,29 @@ engines=(
 
 for engine in "${engines[@]}"; do
 
-#--simulated-population-type permutations \
-#--permutations 50 \
 
 
+# Tolkien characters
+#--simulated-population-type tolkien_characters \
+
+# Real world personas
 #--simulated-population-type famous_people \
 
+# No personas
 #--simulated-population-type permutations \
 #--permutations 50 \
 
+
+# Questionnaire
+# PVQ
+#--data_dir data/data_pvq \
+#--experiment_name pvq_test \
+
+# tolkien donation
+#--data_dir data/data_tolkien_donation \
+#--experiment_name tolkien_donation_test \
+
+
 python -u evaluate.py \
 --simulated-population-type tolkien_characters \
 --simulate-conversation-theme "chess" \
@@ -45,8 +59,8 @@ python -u evaluate.py \
 --save_dir results/test/test \
 --engine $engine \
 --query-in-reply \
---data_dir data/data_wvs_svas \
---experiment_name wvs_svas_test \
+--data_dir data/data_pvq \
+--experiment_name pvq_test \
 --pvq-version "pvq_auto" \
 --no-profile \
 --direct-perspective \
@@ -57,20 +71,3 @@ python -u evaluate.py \
 
 done
 
-#--data_dir data/data_wvs_svas \
-#--experiment_name wvs_svas_test \
-
-#--data_dir data/data_pvq \
-#--experiment_name pvq_test \
-
-#--data_dir data/data_tolkien_donation \
-#--experiment_name tolkien_donation_test \
-
-#--data_dir data/data_tolkien_ultimatum \
-#--experiment_name tolkien_ultimatum_test \
-
-#--data_dir data/data_regular_ultimatum \
-#--experiment_name regular_ultimatum_test \
-
-#--data_dir data/data_tolkien_good_bad \
-#--experiment_name tolkien_good_bad_test \
diff --git a/run_plosone_pvq.sh b/run_plosone_pvq.sh
deleted file mode 100644
index 9fa7c01..0000000
--- a/run_plosone_pvq.sh
+++ /dev/null
@@ -1,179 +0,0 @@
-#!/bin/bash
-
-#####################################################
-### Formats
-#####################################################
-#
-##ENGINE="gpt-4-0314"
-#
-##ENGINE="gpt-3.5-turbo-0301"
-##ENGINE="gpt-3.5-turbo-0613"
-##ENGINE="gpt-3.5-turbo-1106"
-##ENGINE="gpt-3.5-turbo-instruct-0914"
-#
-##ENGINE="llama_2_70b_chat"
-##ENGINE="llama_2_13b_chat"
-##ENGINE="llama_2_70b"
-#
-##ENGINE="zephyr-7b-beta"
-#
-#
-#population_types=(
-##  "user_personas"
-##  "llm_personas"
-##  "famous_people"
-##  "lotr_characters"
-#  "tolkien_characters"
-#)
-#
-#format_options=(
-##  "chat"
-##  "code_cpp"
-##  "code_py"
-#  "conf_toml"
-#  "latex"
-#)
-#
-#for population_type in "${population_types[@]}"; do
-#for format in "${format_options[@]}"; do
-#
-#SAVE_DIR="results_stability/results_pvq_test_format_"$population_type"_"$ENGINE
-#mkdir -p $SAVE_DIR
-#
-#
-#
-#python -u evaluate.py \
-#--simulated-population-type $population_type \
-#--save_dir $SAVE_DIR \
-#--engine "$ENGINE" \
-#--query-in-reply \
-#--system-message \
-#--data_dir data_pvq \
-#--experiment_name pvq_test \
-#--ntrain 0 \
-#--format $format \
-#--no-profile \
-#--verbose  2>&1 | tee -a $SAVE_DIR/log_$format.txt
-#
-#done
-#done
-
-#
-######################################################
-#### Themes (Simulated Conversations)
-######################################################
-#
-##ENGINE="gpt-4-0314"
-#
-##ENGINE="gpt-3.5-turbo-0301"
-##ENGINE="gpt-3.5-turbo-0613"
-##ENGINE="gpt-2.5-turbo-1106"
-##ENGINE="gpt-3.5-turbo-instruct-0914"
-#
-##ENGINE="llama_2_70b_chat"
-##ENGINE="zephyr-7b-beta"
-#
-#ENGINE="Mixtral-8x7B-Instruct-v0.1-4b"
-##ENGINE="Mistral-7B-Instruct-v0.2"
-#
-#population_types=(
-##  "famous_people"
-#  "tolkien_characters"
-#)
-#
-#themes=(
-##  "None"
-#  "grammar"
-#  "joke"
-#  "poem"
-#  "history"
-#  "chess"
-#)
-#
-#ENGINE="Mixtral-8x7B-Instruct-v0.1-4b"
-#
-#for theme in "${themes[@]}"; do
-#for population_type in "${population_types[@]}"; do
-#
-#SAVE_DIR="results_stability/results_pvq_test_sim_conv_1sent_"$population_type"_"$ENGINE
-#
-#mkdir -p $SAVE_DIR
-#
-#
-## conversation
-#python -u evaluate.py \
-#--simulated-population-type $population_type \
-#--save_dir $SAVE_DIR \
-#--engine "$ENGINE" \
-#--query-in-reply \
-#--data_dir data_pvq \
-#--experiment_name pvq_test \
-#--ntrain 0 \
-#--format chat \
-#--simulate-conversation-theme $theme \
-#--no-profile \
-#--verbose  2>&1 | tee -a $SAVE_DIR/log_$theme.txt
-#
-##--system-message \
-#
-#done
-#done
-
-#####################################################
-### Weather
-#####################################################
-
-#ENGINE="gpt-4-0314"
-
-#ENGINE="gpt-3.5-turbo-0301"
-#ENGINE="gpt-3.5-turbo-0613"
-#ENGINE="gpt-3.5-turbo-1106"
-#ENGINE="gpt-3.5-turbo-instruct-0914"
-
-#ENGINE="llama_2_70b_chat"
-#ENGINE="zephyr-7b-beta"
-ENGINE="Mixtral-8x7B-v0.1-4b"
-
-
-population_types=(
-#  "user_personas"
-#  "llm_personas"
-#  "famous_people"
-#  "lotr_characters"
-  "tolkien_characters"
-)
-
-weathers=(
-#  "rain"
-#  "sun"
-#  "snow"
-#  "thunderstorm"
-#  "sandstorm"
-  "blizzard"
-)
-
-for weather in "${weathers[@]}"; do
-for population_type in "${population_types[@]}"; do
-
-SAVE_DIR="results_weather/test_weather_"$ENGINE
-mkdir -p $SAVE_DIR
-
-
-#--system-message \
-
-# conversation
-python -u evaluate.py \
---simulated-population-type $population_type \
---save_dir $SAVE_DIR \
---engine "$ENGINE" \
---query-in-reply \
---data_dir data_pvq \
---experiment_name pvq_test \
---ntrain 0 \
---format chat \
---weather $weather \
---no-profile \
---verbose  2>&1 | tee -a $SAVE_DIR/log_$theme.txt
-
-done
-done
diff --git a/run_single.sh b/run_single.sh
new file mode 100644
index 0000000..17f515a
--- /dev/null
+++ b/run_single.sh
@@ -0,0 +1,268 @@
+#!/bin/bash
+
+# 6 themes
+themes=(
+  "grammar"
+  "joke"
+  "poem"
+  "history"
+  "chess"
+  "None"
+)
+
+#################################
+# 1. Select the theme index [0-5]
+#################################
+
+theme_i=0
+theme="${themes[$theme_i]}"
+
+##################
+## 2. Set the seed
+##################
+seed="1"
+
+######################################
+## 3. Set number of simulated messages
+######################################
+n_msgs=3
+
+
+permute_options_seed="$seed"_"$theme_i"
+
+#################################################
+## 4. Select the llm (uncomment the one you want)
+#################################################
+
+#engine="llama_2_7b"
+#engine="llama_2_13b"
+#engine="llama_2_7b_chat"
+#engine="llama_2_13b_chat"
+#engine="zephyr-7b-beta"
+#engine="Mistral-7B-v0.1"
+#engine="Mistral-7B-Instruct-v0.1"
+#engine="Mistral-7B-Instruct-v0.2"
+#engine="llama_2_70b" # 2 gpu
+#engine="llama_2_70b_chat" # 2 gpu
+#engine="Mixtral-8x7B-v0.1-4b" # 4h
+#engine="Mixtral-8x7B-Instruct-v0.1-4b" # 4h
+#engine="Mixtral-8x7B-v0.1"
+#engine="Mixtral-8x7B-Instruct-v0.1"
+#engine="phi-2"
+#engine="phi-1"
+#engine="phi-1.5"
+#engine="Qwen-72B"
+#engine="Qwen-14B"
+#engine="Qwen-7B"
+#engine="Qwen-72B-Chat"
+engine="dummy"
+
+
+
+
+###########################################################
+## 5. Questionnaire (uncomment the corresponding two lines)
+###########################################################
+
+# PVQ
+test_tag="pvq"
+data_dir="data_pvq"
+experiment_name="pvq_test"
+
+# Tolkien donation
+#test_tag="tolkien_donation"
+#data_dir="data_tolkien_donation"
+#experiment_name="tolkien_donation_test"
+
+
+#######################################################
+## 6. Simulated population (uncomment the one you want)
+#######################################################
+population_type="tolkien_characters"
+#population_type="famous_people"
+
+#######################################################
+## 7. Set experiment name
+#######################################################
+exp_name="test_experiment"
+
+
+echo "Your experiment:"
+echo "1. Theme:"$theme
+echo "2. Seed:"$seed
+echo "3. N messages:"$n_msgs
+echo "4. LLM:"$engine
+echo "5. Questionnaire:"$test_tag
+echo "6. Population:"$population_type
+echo "7. Experiment name:"$exp_name
+
+
+### Executing the evaluation
+
+SUBDIR=$exp_name"_"$test_tag"_"$population_type"_seeds/"$engine"/"$seed"_seed/results_sim_conv_"$population_type"_"$engine"_msgs_"$n_msgs
+SAVE_DIR="results/"$SUBDIR
+LOG_DIR="logs/"$SUBDIR
+
+mkdir -p $SAVE_DIR
+mkdir -p $LOG_DIR
+
+source $HOME/.bashrc
+
+conda activate llm_persp
+
+
+if [[ $engine == *"Mistral"* ]] || [[ $engine == *"Mixtral"* ]]; then
+
+  echo "Mistral or Mixtral: $engine"
+
+  if [[ $engine == *"Instruct"* ]] ; then
+    # INSTRUCT MODELS
+
+    # mistral, mixtral -> no sys; query
+    python -u evaluate.py \
+      --simulated-population-type $population_type \
+      --simulate-conversation-theme $theme \
+      --simulated-human-knows-persona \
+      --simulated-conversation-n-messages $n_msgs \
+      --permute-options \
+      --permute-options-seed "$permute_options_seed" \
+      --format chat \
+      --save_dir $SAVE_DIR \
+      --engine "$engine" \
+      --query-in-reply \
+      --data_dir data/$data_dir \
+      --experiment_name $experiment_name \
+      --pvq-version "pvq_auto" \
+      --no-profile \
+      --assert-params \
+      --verbose  2>&1 | tee -a $LOG_DIR/log_$permute_options_seed.txt
+
+  else
+    # BASE MODELS
+
+    # mistral, mixtral -> no sys; query
+    python -u evaluate.py \
+      --simulated-population-type $population_type \
+      --simulate-conversation-theme $theme \
+      --simulated-human-knows-persona \
+      --simulated-conversation-n-messages $n_msgs \
+      --permute-options \
+      --permute-options-seed "$permute_options_seed" \
+      --format chat \
+      --save_dir $SAVE_DIR \
+      --engine "$engine" \
+      --query-in-reply \
+      --system-message \
+      --base-model-template \
+      --data_dir data/$data_dir \
+      --experiment_name $experiment_name \
+      --pvq-version "pvq_auto" \
+      --no-profile \
+      --assert-params \
+      --verbose  2>&1 | tee -a $LOG_DIR/log_$permute_options_seed.txt
+  fi
+
+elif [[ $engine == *"phi"* ]] || [[ $engine == "Qwen-"*"B" ]]; then
+
+    # all phi models are BASE and qwen base
+    python -u evaluate.py \
+      --simulated-population-type $population_type \
+      --simulate-conversation-theme $theme \
+      --simulated-human-knows-persona \
+      --simulated-conversation-n-messages $n_msgs \
+      --permute-options \
+      --permute-options-seed "$permute_options_seed" \
+      --format chat \
+      --save_dir $SAVE_DIR \
+      --engine "$engine" \
+      --query-in-reply \
+      --system-message \
+      --base-model-template \
+      --data_dir data/$data_dir \
+      --experiment_name $experiment_name \
+      --pvq-version "pvq_auto" \
+      --no-profile \
+      --assert-params \
+      --verbose  2>&1 | tee -a $LOG_DIR/log_$permute_options_seed.txt
+
+elif [[ $engine == *"zephyr"* ]] || [[ $engine == *"llama_2"* ]] || [[ $engine == "dummy" ]]; then
+
+  echo "Zephyr or LLaMa: $engine"
+
+  if [[ $engine == *"llama_2"* ]] && [[ $engine != *"chat"* ]]; then
+    # BASE MODELS
+
+    # llama_base_model
+    python -u evaluate.py \
+      --simulated-population-type $population_type \
+      --simulate-conversation-theme $theme \
+      --simulated-human-knows-persona \
+      --simulated-conversation-n-messages $n_msgs \
+      --permute-options \
+      --permute-options-seed "$permute_options_seed" \
+      --format chat \
+      --save_dir $SAVE_DIR \
+      --engine "$engine" \
+      --query-in-reply \
+      --system-message \
+      --base-model-template \
+      --data_dir data/$data_dir \
+      --experiment_name $experiment_name \
+      --pvq-version "pvq_auto" \
+      --no-profile \
+      --assert-params \
+      --verbose  2>&1 | tee -a $LOG_DIR/log_$permute_options_seed.txt
+
+  else
+
+    # INSTUCT, DPO models
+    # zephyr, llama -> sys ; query
+    python -u evaluate.py \
+      --simulated-population-type $population_type \
+      --simulate-conversation-theme $theme \
+      --simulated-human-knows-persona \
+      --simulated-conversation-n-messages $n_msgs \
+      --permute-options \
+      --permute-options-seed "$permute_options_seed" \
+      --format chat \
+      --save_dir $SAVE_DIR \
+      --engine "$engine" \
+      --query-in-reply \
+      --system-message \
+      --data_dir data/$data_dir \
+      --experiment_name $experiment_name \
+      --pvq-version "pvq_auto" \
+      --no-profile \
+      --assert-params \
+      --verbose  2>&1 | tee -a $LOG_DIR/log_$permute_options_seed.txt
+
+  fi
+
+
+elif [[ $engine == *"gpt"* ]] ; then
+
+  echo "GPTs: $engine"
+
+  # gpts -> sys ; no query
+  python -u evaluate.py \
+    --simulated-population-type $population_type \
+    --simulate-conversation-theme $theme \
+    --simulated-human-knows-persona \
+    --simulated-conversation-n-messages $n_msgs \
+    --permute-options-seed "$permute_options_seed" \
+    --permute-options \
+    --format chat \
+    --save_dir $SAVE_DIR \
+    --engine "$engine" \
+    --system-message \
+    --data_dir data/$data_dir \
+    --experiment_name $experiment_name \
+    --pvq-version "pvq_auto" \
+    --no-profile \
+    --assert-params \
+    --verbose  2>&1 | tee -a $LOG_DIR/log_$permute_options_seed.txt
+
+
+else
+  echo "Undefined engine: $engine"
+fi
\ No newline at end of file
diff --git a/vis_pop_values.py b/vis_pop_values.py
new file mode 100644
index 0000000..5c1c3ad
--- /dev/null
+++ b/vis_pop_values.py
@@ -0,0 +1,281 @@
+import argparse
+from collections import Counter
+import json
+import sys
+import os
+import numpy as np
+from itertools import chain
+import matplotlib.pyplot as plt
+from sklearn.decomposition import PCA
+from sklearn.preprocessing import StandardScaler
+from matplotlib.lines import Line2D
+
+# bad_guys = [
+#     "Gollum", "Sauron", "Saruman", "Smaug", "Morgoth", "Shelob", "GrÃma Wormtongue",
+#     "Ungoliant", "Gothmog (Balrog)", "Lungorthin", "Durin's Bane", "Thuringwethil"
+# ]
+#
+# good_guys = [
+#     "Gandalf", "Aragorn", "Celeborn", "Galadriel", "Tom Bombadil", "Elrond",
+#     "Frodo Baggins", "Finrod Felagund", "Glorfindel", "Goldberry", "Bilbo Baggins",
+#     "Faramir", "Ã‰owyn", "Samwise Gamgee", "FÃ«anor", "ThÃ©oden", "Boromir", "TÃºrin Turambar",
+#     "Thranduil", "Beorn", "Arwen", "Halbarad", "Fingon", "Fingolfin", "Celebrimbor", "Gil-galad",
+#     "Meriadoc Brandybuck", "Treebeard", "Radagast", "Elendil", "Ã‰omer", "Legolas", "HÃºrin",
+#     "Thorin Oakenshield", "Peregrin Took", "Thingol", "EÃ¤rendil", "Elwing", "LÃºthien", "Beren",
+#     "Tuor", "Idril", "FinwÃ«", "MÃriel", "Bard the Bowman", "Melian", "Balin", "Gimli",
+# ]
+#
+# neutral_guys = [
+#     "Smeagol",
+#     "SmÃ©agol",
+#     "Maedhros",
+#     "Isildur",
+#     "Denethor",
+#     "Watcher in the Water",
+# ]
+
+# GPT-4 classification
+good_guys = [
+    "Gandalf", "Aragorn", "Celeborn", "Galadriel", "Tom Bombadil", "Elrond", "Frodo Baggins", 
+    "Finrod Felagund", "Glorfindel", "Goldberry", "Bilbo Baggins", "Faramir", "Ã‰owyn", 
+    "Samwise Gamgee", "ThÃ©oden", "Thranduil", "Beorn", "Arwen", "Halbarad", "Celebrimbor", 
+    "Gil-galad", "Meriadoc Brandybuck", "Treebeard", "Radagast", "Elendil", "Ã‰omer", "Legolas", 
+    "Thorin Oakenshield", "Peregrin Took", "EÃ¤rendil", "Elwing", "LÃºthien", "Beren", "Tuor", 
+    "Idril", "FinwÃ«", "MÃriel", "Melian", "Gimli"
+]
+
+neutral_guys = [
+    "SmÃ©agol", "Maedhros", "FÃ«anor", "Boromir", "TÃºrin Turambar", "Isildur", "Denethor", "HÃºrin", "Thingol"
+]
+
+bad_guys = [
+    "Gollum", "Sauron", "Saruman", "Smaug", "Morgoth", "Gothmog (Balrog)", "Lungorthin", 
+    "Shelob", "GrÃma Wormtongue", "Ungoliant", "Thuringwethil", "Durin's Bane"
+]
+
+values = [
+    'Conformity',
+    'Tradition',
+    'Benevolence',
+    'Universalism',
+    'Self-Direction',
+    'Stimulation',
+    'Hedonism',
+    'Achievement',
+    'Power',
+    'Security'
+]
+
+def process_json(json_path):
+    json_data = load_json_file(json_path)
+
+    average_per_part_answer = [
+        np.array(answers['pvq_auto'])[:, 1].astype(float).mean() for answers in json_data["answers"]
+    ]
+
+    return {
+        name: [
+            metrics['pvq_auto'][v] - avg_part_answer for v in values
+        ] for name, metrics, avg_part_answer in zip(
+            json_data['simulated_population'],
+            json_data['per_simulated_participant_metrics'],
+            average_per_part_answer
+        )
+    }
+
+
+def load_json_file(file_path):
+    try:
+        with open(file_path, 'r') as file:
+            data = json.load(file)
+            return data
+    except FileNotFoundError:
+        print("File not found.")
+    except json.JSONDecodeError:
+        print("Error decoding JSON.")
+    except Exception as e:
+        print(f"An error occurred: {e}")
+
+        
+def find_most_common_element(lst):
+    if not lst:
+        return None
+
+    count = Counter(lst)
+    most_common = count.most_common(1)[0][0]
+    return most_common
+
+
+# Function to load a JSON file
+def load_json_file(file_path):
+    try:
+        with open(file_path, 'r') as file:
+            data = json.load(file)
+            return data
+    except FileNotFoundError:
+        print(f"File not found: {file_path}")
+    except json.JSONDecodeError:
+        print(f"Error decoding JSON in file: {file_path}")
+    except Exception as e:
+        print(f"An error occurred while processing {file_path}: {e}")
+
+
+ignore_patterns = [
+    # "tax",
+    # "vacation",
+    # "religion",
+    "format_chat___",
+]
+
+# Function to process directories and load JSON files
+def process_directories(dirs):
+
+    to_process = []
+    for dir in dirs:
+        if any([pat in dir for pat in ignore_patterns]):
+            continue
+
+        if os.path.isdir(dir):
+            for filename in os.listdir(dir):
+                if filename.endswith('.json'):
+                    file_path = os.path.join(dir, filename)
+                    to_process.append(file_path)
+
+
+        elif os.path.isfile(dir) and dir.endswith('.json'):
+            to_process.append(dir)
+
+        else:
+            print(f"{dir} is not a valid directory or JSON file")
+
+    participant_values_ = []
+    for file_path in to_process:
+        participant_values_.append(process_json(file_path))
+
+    names = list(participant_values_[0].keys())
+
+    # merge contexts
+    participant_values = {}
+    for name in names:
+        participant_values[name] = np.array(list(chain(*[part_v[name] for part_v in participant_values_])))
+
+    return participant_values
+
+
+# Main function
+def main():
+    parser = argparse.ArgumentParser(description='Process some directories.')
+    parser.add_argument('dirs', nargs='*', help='Directories to process')
+
+    # Parse the arguments
+    args = parser.parse_args()
+    participant_values = process_directories(args.dirs)
+
+    names = list(participant_values.keys())
+
+    data = np.stack([participant_values[n] for n in names])
+
+    values_names = ["Power", "Achievement", "Benevolence", "Universalism", "Tradition", "Conformity", "Security", "Self-Direction", "Stimulation", "Hedonism"]
+
+    # min_ = np.min(data)
+    # max_ = np.max(data)
+    # mean_ = np.mean(data)
+    # var_ = np.var(data)
+    #
+    # vals = []
+    # # todo: make pairs correct -> sometimes 3 someteimes 2 etc
+    values_names_pairs = list(zip(values_names[::2], values_names[1::2]))
+
+    # values_names_pairs = [["Power", "Achievement"], ["Benevolence", "Universalism"]]
+    # names = names+values_names_pairs
+    # for val_names in values_names_pairs:
+    #     vals.append(np.array([4 if v in val_names else -1 for v in values] * 5))
+
+    # names = names+values_names
+    # for val_name in values_names:
+    #     # 6 1
+    #     vals.append(np.array([4.5 if v == val_name else -0.5 for v in values] * 5))
+    #     # 6 3.5
+    #     # vals.append(np.array([2.25 if v == val_name else -0.25 for v in values] * 5))
+
+    # data = np.vstack([data, np.array(vals)])
+
+    # powe, ach, ben, uni
+    # data = np.hstack([data[:,i::10] for i in range(4)])
+
+    pca = PCA(n_components=2)
+    print(f"Data shape: {data.shape}")
+    data = StandardScaler().fit_transform(data)
+    data_trans = pca.fit_transform(data)
+
+    # pca_trans = data_trans[:-10]
+    # vals_trans = data_trans[-10:]
+    pca_trans = data_trans
+
+    R2_1, R2_2 = pca.explained_variance_ratio_
+    pca_1 = pca_trans[:, 0]
+    pca_2 = pca_trans[:, 1]
+
+    plt.figure(figsize=(10, 10))
+    plt.xticks(fontsize=20)
+    plt.yticks(fontsize=20)
+
+    plt.xlabel(r'PC 1 ($R^{2}=' + f'{R2_1:.2f}' + '$)', fontsize=25)
+    plt.ylabel(r'PC 2 ($R^{2}=' + f'{R2_2:.2f}' + '$)', fontsize=25)
+
+    text_fontsize=10
+
+    colors = []
+
+    for name in names:
+        if name in bad_guys:
+            color = "red"
+        elif name in good_guys:
+            color = "green"
+        elif name in values_names or name in values_names_pairs:
+            color = "blue"
+        else:
+            color = "black"
+
+        colors.append(color)
+
+    plt.scatter(pca_1, pca_2, c=colors, s=50)
+
+    for x, y, n in zip(pca_1, pca_2, names):
+        plt.text(x+0.15, y+0.15, n, fontsize=text_fontsize)
+
+    # for val_name, val_trans in zip(values_names, vals_trans):
+    #     plt.scatter([val_trans[0]], [val_trans[1]], marker="x")
+    #     plt.text(val_trans[0], val_trans[1], val_name)
+
+    # plt.scatter([power_pca[0]], [power_pca[1]], c="red", marker="x")
+    # plt.scatter([ben_pca[0]], [ben_pca[1]], c="green", marker="x")
+    # plt.scatter([tr_pca[0]], [tr_pca[1]], c="yellow", marker="x")
+    # plt.scatter([hed_pca[0]], [hed_pca[1]], c="blue", marker="x")
+
+    # plt.text(power_pca[0], power_pca[1], "power")
+    # plt.text(ben_pca[0], ben_pca[1], "benevolence")
+    # plt.text(tr_pca[0], tr_pca[1], "tradition")
+    # plt.text(hed_pca[0], hed_pca[1], "hedonism")
+
+    # plt.legend(targets, prop={'size': 15})
+    # plt.show()
+
+    legend_elements = [
+        Line2D([0], [0], marker='o', color='w', label='Positive ch.', markerfacecolor='green', markersize=20),
+        Line2D([0], [0], marker='o', color='w', label='Neutral ch.', markerfacecolor='black', markersize=20),
+        Line2D([0], [0], marker='o', color='w', label='Negative ch.', markerfacecolor='red', markersize=20),
+    ]
+
+    # Add the legend to the plot
+    plt.legend(handles=legend_elements, fontsize=23)
+
+    savepath = f"visualizations/pca.svg"
+    plt.savefig(savepath)
+    print(f"Saved to: {savepath}")
+
+    plt.show()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/visualization_scripts/plt_vis_tomi.py b/visualization_scripts/plt_vis_tomi.py
deleted file mode 100644
index a178fbc..0000000
--- a/visualization_scripts/plt_vis_tomi.py
+++ /dev/null
@@ -1,105 +0,0 @@
-import matplotlib.pyplot as plt
-import json
-
-transpose = False
-
-
-def load_from_json(path, subjs, subj_for_macro_avg=None):
-    with open(path, "r") as read_file:
-        accs_json = json.load(read_file)
-
-    subject_accs = [accs_json[s] for s in subjs]
-
-    if subj_for_macro_avg is not None:
-        macro_avg = sum([accs_json[s] for s in subj_for_macro_avg])/len(subj_for_macro_avg)
-        subject_accs += [macro_avg]
-
-    return subject_accs
-
-
-subj_to_label = {
-    "tomi_first_order_no_tom": "First_order TB",  # no tom
-    "tomi_first_order_tom": "First_order FB",  # tom
-    "tomi_memory": "Memory",
-    "tomi_reality": "Reality",
-    "tomi_second_order_no_tom": "Second_order(TB)",  # no tom
-    "tomi_second_order_tom": "Second_order(FB)",  # tom
-    "average": "Avg (micro)"
-}
-
-subj_for_macro_avg = [
-    "tomi_first_order_no_tom",
-    "tomi_first_order_tom",
-    "tomi_memory",
-    "tomi_reality",
-    "tomi_second_order_no_tom",  # no tom
-    "tomi_second_order_tom",  # tom
-]
-
-subjs = ["tomi_first_order_no_tom", "tomi_first_order_tom", "tomi_memory", "tomi_reality", "tomi_second_order_no_tom", "tomi_second_order_tom", "average"]
-
-
-title = "gpt-3.5"
-title = ""
-data = [
-    # x-axis
-    [subj_to_label[s] for s in subjs] + ["Avg (macro)"],
-    [
-        ("default", load_from_json("results/results_tomi_age/tomi_test_gpt-3.5-turbo-0301_data_tomi__2023_04_18_10_58_34/results.json", subjs, subj_for_macro_avg=subj_for_macro_avg)),
-        ("3 yo", load_from_json("results/results_tomi_age/tomi_test_gpt-3.5-turbo-0301_data_tomi_ntrain_0_profile_Age:3_2023_04_18_10_40_13/results.json", subjs, subj_for_macro_avg=subj_for_macro_avg)),
-        ("5 yo", load_from_json("results/results_tomi_age/tomi_test_gpt-3.5-turbo-0301_data_tomi_ntrain_0_profile_Age:5_2023_04_18_11_07_41/results.json", subjs, subj_for_macro_avg=subj_for_macro_avg)),
-        ("10 yo", load_from_json( "results/results_tomi_age/tomi_test_gpt-3.5-turbo-0301_data_tomi_ntrain_0_profile_Age:10_2023_04_18_11_09_52/results.json", subjs, subj_for_macro_avg=subj_for_macro_avg)),
-        ("30 yo", load_from_json("results/results_tomi_age/tomi_test_gpt-3.5-turbo-0301_data_tomi_ntrain_0_profile_Age:30_2023_04_18_11_12_02/results.json", subjs, subj_for_macro_avg=subj_for_macro_avg)),
-    ]
-]
-
-test_names = data[0]
-baselines = [lab for lab, _ in data[1]]
-
-# transpose data
-data_ = [
-    # x-axis
-    baselines,
-    [
-        (test_name, []) for test_name in test_names
-    ]
-]
-for b_i in range(len(baselines)):
-    for test_i in range(len(test_names)):
-        data_[1][test_i][1].append(
-            data[1][b_i][1][test_i]
-        )
-
-# replace data with transposed data
-if transpose:
-    data = data_
-    print("Drawing transposed data (x-axis - baselines).")
-
-
-# invert data
-
-# Extract the data
-labels = data[0]
-models = data[1]
-
-# Create the plot
-fig, ax = plt.subplots(figsize=(14, 6))
-ax.set_ylim(bottom=0.0, top=1.05)
-
-for model_name, model_data in models:
-    x = range(len(labels))
-    y = model_data  # remove the "Avg (micro)" score
-    ax.plot(x, y, label=model_name, marker='o', linewidth=3)
-
-# Set the axes labels and title
-ax.set_xticks(range(len(labels)))
-ax.set_xticklabels(labels)
-ax.set_xlabel("Subjects")
-ax.set_ylabel("Accuracy")
-ax.set_title(title)
-
-# Add a legend
-ax.legend()
-
-# Display the plot
-plt.show()
diff --git a/visualization_scripts/scatter_plot_tomi_old.py b/visualization_scripts/scatter_plot_tomi_old.py
deleted file mode 100644
index 63b117f..0000000
--- a/visualization_scripts/scatter_plot_tomi_old.py
+++ /dev/null
@@ -1,244 +0,0 @@
-import matplotlib.pyplot as plt
-import json
-
-transpose = False
-
-# # k=3
-# data = [
-#     [
-#         "First_order(TB)",  # no tom
-#         "First_order(FB)",  # tom
-#         "Memory",
-#         "Reality",
-#         "Second_order(TB)",  # no tom
-#         "Second_order(FB)",  # tom
-#         "Avg (micro)"
-#     ],
-#     [
-#         ("ChatGPT"  , [0.973, 0.909, 1.000, 1.000, 0.917, 0.542, 0.875]),
-#         ("LLaMa 7B" , [0.811, 1.000, 1.000, 0.833, 0.917, 0.917, 0.892]),
-#         ("LLaMa 13B", [0.973, 1.000, 1.000, 1.000, 0.833, 0.792, 0.917]),
-#         ("LLaMa 30B", [0.946, 1.000, 1.000, 0.958, 0.875, 0.917, 0.938]),
-#     ]
-# ]
-
-# k=1 (in-context 50 years old)
-
-
-def load_from_json(path, subjs, subj_for_macro_avg=None):
-    with open(path, "r") as read_file:
-        accs_json = json.load(read_file)
-
-    subject_accs = [accs_json[s] for s in subjs]
-
-    if subj_for_macro_avg is not None:
-        macro_avg = sum([accs_json[s] for s in subj_for_macro_avg])/len(subj_for_macro_avg)
-        subject_accs += [macro_avg]
-
-    return subject_accs
-
-
-# subj_to_label = {
-#     "tomi_first_order_no_tom": "First_order TB (74)",  # no tom
-#     "tomi_first_order_tom": "First_order FB (22)",  # tom
-#     "tomi_memory": "Memory (48)",
-#     "tomi_reality": "Reality (48)",
-#     "tomi_second_order_no_tom": "Second_order(TB) (70)",  # no tom
-#     "tomi_second_order_tom": "Second_order(FB) (26)",  # tom
-#     "average": "Avg (micro)"
-# }
-subj_to_label = {
-    "tomi_first_order_no_tom": "First_order TB",  # no tom
-    "tomi_first_order_tom": "First_order FB",  # tom
-    "tomi_memory": "Memory",
-    "tomi_reality": "Reality",
-    "tomi_second_order_no_tom": "Second_order(TB)",  # no tom
-    "tomi_second_order_tom": "Second_order(FB)",  # tom
-    "average": "Avg (micro)"
-}
-
-subj_for_macro_avg = [
-    "tomi_first_order_no_tom",
-    "tomi_first_order_tom",
-    "tomi_memory",
-    "tomi_reality",
-    "tomi_second_order_no_tom",  # no tom
-    "tomi_second_order_tom",  # tom
-]
-
-subjs = ["tomi_first_order_no_tom", "tomi_first_order_tom", "tomi_memory", "tomi_reality", "tomi_second_order_no_tom", "tomi_second_order_tom", "average"]
-
-
-title = "gpt-3.5"
-title = ""
-data = [
-    # x-axis
-    [subj_to_label[s] for s in subjs] + ["Avg (macro)"],
-    # torchrun --nproc_per_node 4 evaluate.py -k 0 -d data_tomi_50_mcq_shuf -e llama_30B -n tomi_default_new
-    # torchrun --nproc_per_node 4 evaluate.py -k 0 -d data_tomi_50_mcq_shuf -e llama_30B -n tomi_age_<>_new
-    #     ("3 yo", load_from_json("results/tomi_age_0_new_llama_30B_data_tomi_50_mcq_shuf_ntrain_0/results.json", subjs, subj_for_macro_avg=subj_for_macro_avg)),
-
-    # # LLama
-    [
-        # ("random", load_from_json("results/tomi_default_new_dummy_data_tomi_50_mcq_shuf_ntrain_0/results.json", subjs, subj_for_macro_avg=subj_for_macro_avg)),
-        # ("default", load_from_json("results/tomi_default_new_llama_30B_data_tomi_50_mcq_shuf_ntrain_0/results.json", subjs, subj_for_macro_avg=subj_for_macro_avg)),
-        ("3 yo", load_from_json("results/tomi_age_0_new_llama_30B_data_tomi_50_mcq_shuf_ntrain_0/results.json", subjs, subj_for_macro_avg=subj_for_macro_avg)),
-        ("5 yo", load_from_json("results/tomi_age_1_new_llama_30B_data_tomi_50_mcq_shuf_ntrain_0/results.json", subjs, subj_for_macro_avg=subj_for_macro_avg)),
-        ("10 yo", load_from_json("results/tomi_age_2_new_llama_30B_data_tomi_50_mcq_shuf_ntrain_0/results.json", subjs, subj_for_macro_avg=subj_for_macro_avg)),
-        ("15 yo", load_from_json("results/tomi_age_3_new_llama_30B_data_tomi_50_mcq_shuf_ntrain_0/results.json", subjs, subj_for_macro_avg=subj_for_macro_avg)),
-        ("30 yo", load_from_json("results/tomi_age_4_new_llama_30B_data_tomi_50_mcq_shuf_ntrain_0/results.json", subjs, subj_for_macro_avg=subj_for_macro_avg)),
-        ("50 yo", load_from_json("results/tomi_age_5_new_llama_30B_data_tomi_50_mcq_shuf_ntrain_0/results.json", subjs, subj_for_macro_avg=subj_for_macro_avg)),
-    ],
-    #
-    # # Chat-GPT
-    # [
-    #     # ("default", load_from_json("results/tomi_default_new_gpt-3.5-turbo_data_tomi_50_mcq_shuf_ntrain_0/results.json", subjs, subj_for_macro_avg=subj_for_macro_avg)),
-    #     ("3 yo", load_from_json("results/tomi_age_0_new_gpt-3.5-turbo_data_tomi_50_mcq_shuf_ntrain_0/results.json", subjs, subj_for_macro_avg=subj_for_macro_avg)),
-    #     ("5 yo", load_from_json("results/tomi_age_1_new_gpt-3.5-turbo_data_tomi_50_mcq_shuf_ntrain_0/results.json", subjs, subj_for_macro_avg=subj_for_macro_avg)),
-    #     ("10 yo", load_from_json("results/tomi_age_2_new_gpt-3.5-turbo_data_tomi_50_mcq_shuf_ntrain_0/results.json", subjs, subj_for_macro_avg=subj_for_macro_avg)),
-    #     ("15 yo", load_from_json("results/tomi_age_3_new_gpt-3.5-turbo_data_tomi_50_mcq_shuf_ntrain_0/results.json", subjs, subj_for_macro_avg=subj_for_macro_avg)),
-    #     ("30 yo", load_from_json("results/tomi_age_4_new_gpt-3.5-turbo_data_tomi_50_mcq_shuf_ntrain_0/results.json", subjs, subj_for_macro_avg=subj_for_macro_avg)),
-    # ],
-]
-# # MCQ (k=0) 1000 test
-# [
-#     ("3 years old" , []),
-#     # ("5 years old" , []),
-#     # ("10 years old", []),
-#     # ("15 years old", []),
-#     ("30 years old", []),
-# ],
-# # MCQ (k=0) 250 test
-# [
-#     ("3 years old" , [0.020, 0.959, 1.000, 0.072, 0.006, 0.984, 0.373]),
-#     # ("5 years old" , []),
-#     # ("10 years old", []),
-#     # ("15 years old", []),
-#     ("30 years old", [0.289, 0.722, 1.000, 0.361, 0.068, 0.942, 0.485]),
-# ],
-
-
-########
-########
-##  the most important experiment
-# MCQ (k=0) 50 test
-# [
-#     ("default (k=0)", [0.243, 0.727, 0.938, 0.104, 0.229, 0.615, 0.403]),
-#     # ("default-no undef (k=0)", [0.838, 0.091, 0.458, 0.833, 0.829, 0.385, 0.674]),
-#     ("3 years old (k=0)", [0.014, 1.000, 1.000, 0.042, 0.000, 1.000, 0.344]),
-#     ("5 years old (k=0)", [0.108, 0.955, 1.000, 0.146, 0.029, 1.000, 0.389]),
-#     ("10 years old (k=0)", [0.365, 0.818, 0.958, 0.479, 0.129, 0.923, 0.510]),
-#     ("15 years old (k=0)", [0.432, 0.636, 0.979, 0.417, 0.100, 0.923, 0.500]),
-#     ("30 years old (k=0)", [0.432, 0.636, 0.979, 0.438, 0.071, 0.962, 0.500]),
-# ],
-
-# # MCQ (k=0) 50 test - NO UNDEF options presented
-# [
-#     # ("default undef (k=0)", [0.243, 0.727, 0.938, 0.104, 0.229, 0.615, 0.403]),
-#     ("default (k=0)", [0.838, 0.091, 0.458, 0.833, 0.829, 0.385, 0.674]),
-#     ("3 years old (k=0)", [0.027, 1.000, 1.000, 0.312, 0.014, 1.000, 0.396]),
-#     ("5 years old (k=0)", [0.176, 0.955, 1.000, 0.458, 0.043, 0.962, 0.458]),
-#     ("10 years old (k=0)", [0.419, 0.682, 1.000, 0.646, 0.186, 0.846, 0.556]),
-#     ("15 years old (k=0)", [0.514, 0.545, 1.000, 0.646, 0.243, 0.731, 0.573]),
-#     ("30 years old (k=0)", [0.581, 0.409, 1.000, 0.750, 0.257, 0.846, 0.611]),
-#     ("50 years old (k=0)", [0.432, 0.455, 1.000, 0.708, 0.214, 0.885, 0.562]),
-#     ("dog (k=0)", [0.419, 0.364, 0.917, 0.896, 0.143, 0.769, 0.542]),
-# ],
-
-# MCQ (k=0) 50 test - "This is a questionnaire" - NO UNDEF options presented
-# [
-#     # ("default (k=0)", [0.743, 0.273, 0.958, 0.875, 0.500, 0.615, 0.694]),
-#     ("3 years old (k=0)", [0.811, 0.227, 0.917, 0.750, 0.457, 0.538, 0.663]),
-#     ("5 years old (k=0)", [0.865, 0.045, 0.833, 0.938, 0.671, 0.423, 0.722]),
-#     ("10 years old (k=0)", [0.919, 0.045, 0.771, 0.958, 0.729, 0.385, 0.740]),
-#     ("15 years old (k=0)", [0.892, 0.091, 0.750, 0.938, 0.700, 0.423, 0.726]),
-#     ("30 years old (k=0)", [0.959, 0.045, 0.271, 1.000, 0.857, 0.077, 0.677]),
-#     ("50 years old (k=0)", [0.946, 0.045, 0.333, 1.000, 0.886, 0.192, 0.701]),
-# ],
-########
-########
-
-# MCQ (k=5) 50 test
-# [
-#     ("default (k=5)", [1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000]),
-#     ("3 years old (k=5)", [0.986, 1.000, 1.000, 1.000, 1.000, 1.000, 0.997]),
-#     ("10 years old (k=5)", [1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000]),
-#     ("30 years old (k=5)", [1.000, 1.000, 1.000, 1.000, 1.000, 1.000, 1.000]),
-# ],
-
-# # MCQ (k=1) 50 test
-# [
-#     ("default (k=1)" , [0.986, 0.045, 0.062, 0.979, 0.957, 0.000, 0.663]),
-#     ("3 years old (k=1)" , [0.703, 0.682, 0.771, 0.771, 0.643, 0.269, 0.670]),
-#     ("5 years old (k=1)" , [0.851, 0.455, 0.583, 0.875, 0.786, 0.077, 0.694]),
-#     ("15 years old (k=1)", [0.986, 0.045, 0.146, 0.979, 0.943, 0.000, 0.674]),
-#     ("30 years old (k=1)", [0.986, 0.045, 0.062, 1.000, 0.986, 0.000, 0.674]),
-# ],
-
-
-# GQA (k=1, 50yo)
-# [
-#     ("3 years old" , [0.919, 0.818, 1.000, 0.917, 0.875, 0.583, 0.861]),
-#     ("5 years old" , [0.973, 0.909, 1.000, 0.917, 0.875, 0.583, 0.882]),
-#     ("10 years old", [0.973, 0.909, 1.000, 0.917, 0.917, 0.542, 0.882]),
-#     ("15 years old", [0.973, 0.909, 1.000, 0.917, 0.875, 0.542, 0.875]),
-#     ("30 years old", [0.973, 0.909, 1.000, 0.917, 0.875, 0.542, 0.875]),
-# ]
-# GQA (k=3, 50yo)
-# [
-#     ("3 years old" , [0.973, 0.909, 1.000, 0.958, 0.917, 0.917, 0.951]),
-#     ("10 years old", [1.000, 0.909, 1.000, 0.958, 0.958, 0.833, 0.951]),
-#     ("30 years old", [1.000, 1.000, 1.000, 0.958, 0.917, 0.833, 0.951]),
-# ]
-# ]
-
-test_names = data[0]
-baselines = [lab for lab, _ in data[1]]
-
-# transpose data
-data_ = [
-    # x-axis
-    baselines,
-    [
-        (test_name, []) for test_name in test_names
-    ]
-]
-for b_i in range(len(baselines)):
-    for test_i in range(len(test_names)):
-        data_[1][test_i][1].append(
-            data[1][b_i][1][test_i]
-        )
-
-# replace data with transposed data
-if transpose:
-    data = data_
-    print("Drawing transposed data (x-axis - baselines).")
-
-
-# invert data
-
-# Extract the data
-labels = data[0]
-models = data[1]
-
-# Create the plot
-fig, ax = plt.subplots(figsize=(14, 6))
-ax.set_ylim(bottom=0.0, top=1.05)
-from IPython import embed; embed()
-for model_name, model_data in models:
-    x = range(len(labels))
-    y = model_data  # remove the "Avg (micro)" score
-    ax.plot(x, y, label=model_name, marker='o', linewidth=3)
-
-# Set the axes labels and title
-ax.set_xticks(range(len(labels)))
-ax.set_xticklabels(labels)
-ax.set_xlabel("Subjects")
-ax.set_ylabel("Accuracy")
-ax.set_title(title)
-
-# Add a legend
-ax.legend()
-
-# Display the plot
-plt.show()
diff --git a/visualization_scripts/wvs_analysis.py b/visualization_scripts/wvs_analysis.py
new file mode 100644
index 0000000..8725ea6
--- /dev/null
+++ b/visualization_scripts/wvs_analysis.py
@@ -0,0 +1,353 @@
+import os
+import json
+import warnings
+import random
+
+import matplotlib.pyplot as plt
+import re
+import numpy as np
+from termcolor import colored
+import scipy.stats as stats
+import itertools
+from scipy.stats import tukey_hsd, sem, rankdata
+from scipy.stats import pearsonr, spearmanr, ConstantInputWarning
+from collections import defaultdict
+
+from scipy.spatial import distance
+
+
+def extract_test_set_name(dir_2_data):
+    test_set_names = set(itertools.chain(*[v['metrics'].keys() for v in dir_2_data.values()]))
+
+    assert len(test_set_names) == 1
+    test_set_name = list(test_set_names)[0]
+    return test_set_name
+
+
+def extract_test_set_values(dir_2_data):
+    test_set_values_ = [list(list(v['metrics'].values())[0].keys()) for v in dir_2_data.values()]
+    # test_set_values_ = [list(v['metrics'][test_set_name].keys()) for v in dir_2_data.values()]
+    assert len(set([t.__repr__() for t in test_set_values_])) == 1
+    test_set_values = test_set_values_[0]
+
+    return test_set_values
+
+
+def load_data(directories):
+    # load data
+    data = {}
+    for i, directory in enumerate(directories):
+        if not os.path.isdir(directory):
+            continue
+
+        results_json_path = os.path.join(directory, 'results.json')
+
+        if not os.path.isfile(results_json_path):
+            continue
+
+        with open(results_json_path, 'r') as f:
+            dir_data = json.load(f)
+
+        test_name = extract_test_set_name({directory: dir_data})
+
+        # parse tolkien
+
+        # separated fair unfair races by default
+        parse_metrics = True  # better with fair/unfair
+        parse_good_bad = False  # better without
+
+        if parse_metrics:
+
+            def parse_dir(d):
+
+
+                if "tolkien_donation" in directory:
+                    return d
+
+                elif "pvq" in directory:
+                    return d
+
+                else:
+                    return d
+
+            dir_data['metrics'][test_name] = parse_dir(dir_data['metrics'][test_name])
+
+            dir_data['per_permutation_metrics'] = [
+                {test_name: parse_dir(d[test_name])} for d in dir_data['per_permutation_metrics']
+            ]
+            dir_data['per_simulated_participant_metrics'] = [
+                {test_name: parse_dir(d[test_name])} for d in dir_data['per_simulated_participant_metrics']
+            ]
+
+        data[directory] = dir_data
+
+    return data
+
+
+def is_strictly_increasing(lst):
+    return all(x <= y for x, y in zip(lst, lst[1:]))
+
+
+def print_dict_values(d):
+    print("\t".join([f"{k:<10}" for k in list(d.keys()) + ["Mean"]]))
+    print("\t\t".join([f"{np.round(s, 2):.2}" for s in list(d.values()) + [np.mean(list(d.values()))]]))
+
+
+
+def dir_to_label(directory):
+
+    if "format_chat_simulate_conv" in directory:
+        label = extract_value(directory, "_simulate_conv_")
+
+    elif "simulate_conv" in directory:
+        label = extract_value(directory, "_simulate_conv_")
+
+    elif "weather" in directory:
+        label = extract_value(directory, "_weather_")
+
+    elif "no_profile" in directory:
+        label = extract_value(directory, "_format_")
+
+    elif "profile" in directory:
+        label = extract_profile(directory)
+        label = label.replace("Primary values:", "")
+
+    elif "lotr_character" in directory:
+        label = extract_value(directory, "_lotr_character_")
+    elif "text_type" in directory:
+        label = extract_value(directory, "_text_type_")
+    elif "music_expert" in directory:
+        label = extract_value(directory, "_music_expert_")
+    elif "music_AI_experts" in directory:
+        label = extract_value(directory, "_music_expert_")
+    elif "hobby" in directory:
+        label = extract_value(directory, "_hobby_")
+    else:
+        label = os.path.basename(directory)
+
+    label = label.rstrip("_").lstrip("_")
+    return label
+
+
+def extract_value(directory, key="_lotr_character_"):
+    label = os.path.basename(directory)
+    if key in label:
+        start_index = label.find(key) + len(key)
+
+    elif "_ntrain_" in label:
+        start_index = label.find("ntrain_") + len("ntrain_") + 1
+
+    else:
+        start_index = 0
+
+    match = re.search(r"(_+202\d)", label)
+
+    if match:
+        end_index = label.find(match.group(0))
+    else:
+        end_index = len(label)
+
+    label = label[start_index:end_index]
+
+    return label
+
+def extract_profile(directory):
+    label = os.path.basename(directory)
+
+    if "_profile_" in label:
+        start_index = label.find("_profile_") + len("_profile_")
+
+    elif "_ntrain_" in label:
+        start_index = label.find("ntrain_") + len("ntrain_") + 1
+
+    else:
+        start_index = 0
+
+    if "_2023" in label:
+        end_index = label.find("_2023")
+    else:
+        end_index = len(label)
+
+    label = label[start_index:end_index]
+
+    return label
+
+
+def subjects_average(data, subjects_to_average, metric="accuracy"):
+    present_subjects = list(data['metrics'].keys())
+
+    # all subjects to average are present
+    if all(avg_s in present_subjects for avg_s in subjects_to_average):
+        return np.mean([data['metrics'][s][metric] for s in subjects_to_average])
+    else:
+        return None
+
+
+def extract_by_key(directory, key="Hobbies"):
+    if key is None:
+        return os.path.basename(directory)
+
+    pattern = rf'{key}:([^_]+)'
+    match = re.search(pattern, directory)
+    if match:
+        return match.group(1)
+    else:
+        return 'Unknown'
+
+
+
+if __name__ == '__main__':
+    import argparse
+
+    # normalized_evaluation_data = []
+    # notnorm_evaluation_data = []
+    vals = []
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument('directories', nargs='+', help='directories containing results.json files')
+    parser.add_argument('--plot-save', '-ps', action="store_true")
+    parser.add_argument('--insert-dummy', '-id', action="store_true")
+    parser.add_argument('--separate_legend', action="store_true")
+    parser.add_argument('--plot-ranks', "-pr", action="store_true")
+    parser.add_argument('--plot-ips', "-pi", action="store_true")
+    parser.add_argument('--plot-mean', "-pm", action="store_true")
+    parser.add_argument('--plot-dont-show', "-pds", action="store_true")
+    parser.add_argument('--filename', type=str, default="hobbies_pvq")
+    parser.add_argument('--assert-n-dirs', type=int, default=None)
+    parser.add_argument('--result-json-stdout', action="store_true")
+    args = parser.parse_args()
+
+    if args.result_json_stdout:
+        # redurect stdout to null
+        import sys
+        sys.stdout = open(os.devnull, 'w')
+
+    different_distr = [] # value/traits where the anova test said it's different
+
+    keys_to_plot = None
+
+    bar_width = 0.10
+    bar_margin = 1.2
+
+    mean_primary_value_alignment = None
+    spearman = True
+    if spearman:
+        print("Spearman")
+
+    if args.insert_dummy:
+        print("Inserting dummy participants.")
+
+    if args.plot_mean:
+        fig, ax = plt.subplots(figsize=(15, 10))
+
+    ignore = [
+        "religion",
+        "tax",
+        "vacation",
+        # "format_chat___",
+        # "grammar",
+        # "poem",
+        # "joke",
+        # "history",
+        # "chess",
+    ]
+
+    must_have = ["format_chat___"]
+    must_have = ["_"]
+
+    args.directories = [d for d in args.directories if not any([i in d for i in ignore])]
+    args.directories = [
+        d for d in args.directories if any([m in d for m in must_have])
+    ]
+
+    num_dirs = len([d for d in args.directories if os.path.isdir(d)])
+    all_bars_width = num_dirs * (bar_width*bar_margin)  # bars with margins
+
+    # chronological order
+    directories = args.directories
+
+    # remove directories which contain substrings from the list
+    ignore_patterns = []
+    print("Ignoring patterns: ", ignore_patterns)
+
+    for substring in ignore_patterns:
+        directories = [d for d in directories if substring not in d]
+
+    directories = [d for d in directories if os.path.isfile(os.path.join(d, 'results.json'))]
+
+    print("Directories:\n\t", "\n\t".join(directories))
+
+    if len(directories) < 2:
+        raise IOError(f"Only {len(directories)} result.json files found.")
+
+    if args.assert_n_dirs and (len(directories) != args.assert_n_dirs):
+        raise ValueError(f"Wrong number of dirs found {len(directories)} != {args.assert_n_dirs}.")
+
+    dir_2_data = load_data(directories)
+
+    test_set_name = extract_test_set_name(dir_2_data)
+    test_set_values = extract_test_set_values(dir_2_data)
+
+    pop_1 = "elves"
+    pop_2 = "humans"
+    tot_sim = []
+    for dir in directories:
+        histrs_1 = dir_2_data[dir]['pop_metrics'][pop_1]['hist']
+        histrs_2 = dir_2_data[dir]['pop_metrics'][pop_2]['hist']
+
+        pair_sim = []
+        for histr_1, histr_2 in zip(histrs_1, histrs_2):
+            max_key = max(map(int, itertools.chain(histr_1.keys(), histr_2.keys())))
+            keys = list(map(str, range(1, max_key + 1)))
+
+            N = sum(histr_1.values())
+            probs_1 = [float(histr_1.get(k, 0)) / N for k in keys]
+            probs_2 = [float(histr_2.get(k, 0)) / N for k in keys]
+
+            sim_q = 1 - distance.jensenshannon(probs_1, probs_2)
+            pair_sim.append(sim_q)
+
+        # print(f"----------------------\n{dir_1}\n{dir_2}\n---> {np.mean(pair_sim)}")
+        tot_sim.append(pair_sim)
+
+    final_sim = np.mean(tot_sim)
+    print(f"Similarity {pop_1} - {pop_2}:", final_sim)
+
+    pop = "elves"
+    tot_sim = []
+
+    for dir_1, dir_2 in itertools.combinations(directories, 2):
+        histrs_1 = dir_2_data[dir_1]['pop_metrics'][pop]['hist']
+        histrs_2 = dir_2_data[dir_2]['pop_metrics'][pop]['hist']
+
+        pair_sim = []
+        for histr_1, histr_2 in zip(histrs_1, histrs_2):
+            max_key = max(map(int, itertools.chain(histr_1.keys(), histr_2.keys())))
+            keys = list(map(str,range(1, max_key+1)))
+
+            N = sum(histr_1.values())
+            probs_1 = [float(histr_1.get(k, 0))/N for k in keys]
+            probs_2 = [float(histr_2.get(k, 0))/N for k in keys]
+
+            sim_q = 1 - distance.jensenshannon(probs_1, probs_2)
+            pair_sim.append(sim_q)
+
+        # print(f"----------------------\n{dir_1}\n{dir_2}\n---> {np.mean(pair_sim)}")
+        tot_sim.append(pair_sim)
+
+    final_sim = np.mean(tot_sim)
+    print(f"Similarity {pop} (contexts):", final_sim)
+
+    if args.result_json_stdout:
+        sys.stdout = sys.__stdout__
+
+        outputs = {"Similarity": final_sim}
+
+        class NumpyEncoder(json.JSONEncoder):
+            def default(self, obj):
+                if isinstance(obj, np.ndarray):
+                    return obj.tolist()
+                return json.JSONEncoder.default(self, obj)
+
+        print(json.dumps(outputs, cls=NumpyEncoder))
+
-- 
GitLab