diff --git a/Cargo.lock b/Cargo.lock
index 8aa66181a6ace2d118ed659a0cc6bbf23b4187da..54087f71335425f62b82721c9e822f2c90c7156e 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -719,11 +719,11 @@ checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253"
 
 [[package]]
 name = "git2"
-version = "0.16.1"
+version = "0.18.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ccf7f68c2995f392c49fffb4f95ae2c873297830eb25c6bc4c114ce8f4562acc"
+checksum = "232e6a7bfe35766bf715e55a88b39a700596c0ccfd88cd3680b4cdb40d66ef70"
 dependencies = [
- "bitflags 1.3.2",
+ "bitflags 2.5.0",
  "libc",
  "libgit2-sys",
  "log",
@@ -1086,6 +1086,21 @@ dependencies = [
  "strum_macros",
 ]
 
+[[package]]
+name = "hyperast_query"
+version = "0.1.0"
+dependencies = [
+ "clap 4.5.4",
+ "dotenv",
+ "lazy_static",
+ "log",
+ "pull_request_sanitizer",
+ "rust_utils",
+ "serde",
+ "serde_derive",
+ "serde_json",
+]
+
 [[package]]
 name = "iana-time-zone"
 version = "0.1.60"
@@ -1273,9 +1288,9 @@ checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd"
 
 [[package]]
 name = "libgit2-sys"
-version = "0.14.2+1.5.1"
+version = "0.16.2+1.7.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7f3d95f6b51075fe9810a7ae22c7095f12b98005ab364d8544797a825ce946a4"
+checksum = "ee4126d8b4ee5c9d9ea891dd875cfdc1e9d0950437179104b183d7d8a74d24e8"
 dependencies = [
  "cc",
  "libc",
@@ -1287,9 +1302,9 @@ dependencies = [
 
 [[package]]
 name = "libssh2-sys"
-version = "0.2.23"
+version = "0.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b094a36eb4b8b8c8a7b4b8ae43b2944502be3e59cd87687595cf6b0a71b3f4ca"
+checksum = "2dc8a030b787e2119a731f1951d6a773e2280c660f8ec4b0f5e1505a386e71ee"
 dependencies = [
  "cc",
  "libc",
diff --git a/Cargo.toml b/Cargo.toml
index 25770fd85062eff0b5743d3c0efb451c9e7e4d3b..f98bf032d196d1aef36da19b3cf9ad6a3ccf664e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -4,7 +4,8 @@ members = [
     "extract_edition_script",
     "rust_utils",
     "pull_request_sanitizer",
-    "github_requester"
+    "github_requester",
+    "hyperast_query"
 ]
 
 [workspace.dependencies]
@@ -13,14 +14,16 @@ plot_helper = { git = "https://github.com/fitz35/data_analyze.git", rev = "96d97
 hyper_ast = { path = "../HyperAST/hyper_ast"}
 hyper_diff = { path = "../HyperAST/hyper_diff"}
 hyper_ast_cvs_git = { path = "../HyperAST/cvs/git"}
+hyper_ast_gen_ts_tsquery = { path = "../HyperAST/gen/tree-sitter"}
 
 
 
 clap = { version = "4.2.5", features = ["derive"] }
 
-git2 = { version = "0.16.1", features = ["vendored-libgit2", "vendored-openssl"] }
+git2 = { version = "0.18.2", features = ["vendored-libgit2", "vendored-openssl"] }
 
 
+lazy_static = "1.4.0"
 serde = "1.0.159"
 serde_derive = "1.0.159"
 serde_json = "1.0.94"
diff --git a/extract_edition_script/src/main.rs b/extract_edition_script/src/main.rs
index 5c118429ffb688ee7966ac703ffbaa256fa8105c..af8baaeace9f0323be7e5fd3acf84ebb09f531bc 100644
--- a/extract_edition_script/src/main.rs
+++ b/extract_edition_script/src/main.rs
@@ -1,6 +1,7 @@
 use std::fs;
 
 use log::{info, error};
+use pull_request_sanitizer::load_sanitized_output_cve_data;
 use pull_request_sanitizer::pipeline::SanitizedOutputCveData;
 use rust_utils::logger::common_logger::init_logger;
 
@@ -16,37 +17,8 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
     let argv = get_program_args();
     info!("🚀 Start extraction.");
 
-    let entries = fs::read_dir(&argv.dataset_path).unwrap();
+    let cve_datas : Vec<SanitizedOutputCveData> = load_sanitized_output_cve_data(&argv.dataset_path)?;
 
-    let mut cve_datas : Vec<SanitizedOutputCveData> = Vec::new();
-
-    for entry in entries {
-        let path = match entry {
-            Ok(entry) => entry.path(),
-            Err(_) => continue, // Skip to next iteration if entry is an error
-        };
-    
-        if !path.is_dir() {
-            continue; // Skip to next iteration if path is not a directory
-        }
-    
-        let data_path = path.join("data.json");
-        if !data_path.exists() {
-            continue; // Skip to next iteration if data.json does not exist
-        }
-    
-        let contents =fs::read_to_string(&data_path)?;
-    
-        let data = match serde_json::from_str(&contents) {
-            Ok(data) => data,
-            Err(_) => panic!("Failed to parse data.json"),
-        };
-    
-        cve_datas.push(data);
-    }
-    
-
-    cve_datas.sort_by(|a, b| a.get_cve_id().cmp(&b.get_cve_id()));
 
     for cve_metadata in cve_datas.into_iter() {
         let id = cve_metadata.get_cve_id().to_string();
diff --git a/hyperast_query/Cargo.toml b/hyperast_query/Cargo.toml
new file mode 100644
index 0000000000000000000000000000000000000000..739dde92542c18393282d8155176b123cb5cbbd7
--- /dev/null
+++ b/hyperast_query/Cargo.toml
@@ -0,0 +1,22 @@
+[package]
+name = "hyperast_query"
+version = "0.1.0"
+edition = "2021"
+
+
+
+[dependencies]
+rust_utils = { path = "../rust_utils" }
+pull_request_sanitizer = { path = "../pull_request_sanitizer" }
+
+lazy_static = {workspace = true}
+
+log = {workspace = true}
+
+dotenv = {workspace = true}
+
+clap = {workspace = true}
+
+serde = {workspace = true}
+serde_derive = {workspace = true}
+serde_json = {workspace = true}
\ No newline at end of file
diff --git a/hyperast_query/src/main.rs b/hyperast_query/src/main.rs
new file mode 100644
index 0000000000000000000000000000000000000000..84b5f815cfe364eaf1ac166622f60853a48fd038
--- /dev/null
+++ b/hyperast_query/src/main.rs
@@ -0,0 +1,28 @@
+
+use log::info;
+use pull_request_sanitizer::load_sanitized_output_cve_data;
+use pull_request_sanitizer::pipeline::SanitizedOutputCveData;
+use rust_utils::logger::common_logger::init_logger;
+
+use crate::params::argv::get_program_args;
+
+mod params;
+
+
+
+fn main() -> Result<(), Box<dyn std::error::Error>> {
+    dotenv::dotenv().ok();
+    init_logger();
+    let argv = get_program_args();
+    info!("🚀 Start testing the hyperast query.");
+
+
+    let cve_datas : Vec<SanitizedOutputCveData> = load_sanitized_output_cve_data(&argv.dataset_path)?;
+
+    for cve_data in cve_datas {
+       
+    }
+
+
+    Ok(())
+}
\ No newline at end of file
diff --git a/hyperast_query/src/params/argv.rs b/hyperast_query/src/params/argv.rs
new file mode 100644
index 0000000000000000000000000000000000000000..8cdd89e5229ae75399b65a0e5af006e6f6a9fb17
--- /dev/null
+++ b/hyperast_query/src/params/argv.rs
@@ -0,0 +1,16 @@
+use clap::Parser;
+
+/// Benchmark the query on the hyper ast
+#[derive(Parser, Debug)]
+#[command(author, version, about, long_about = None)]
+pub struct Argv {
+    /// the dataset path to use
+    #[arg(short, long)]
+    pub dataset_path : String,
+}
+
+
+
+pub fn get_program_args() -> Argv {
+    return Argv::parse();
+}
\ No newline at end of file
diff --git a/hyperast_query/src/params/mod.rs b/hyperast_query/src/params/mod.rs
new file mode 100644
index 0000000000000000000000000000000000000000..b0be02939b5f455dd7034180e0063be4af3c0611
--- /dev/null
+++ b/hyperast_query/src/params/mod.rs
@@ -0,0 +1,3 @@
+pub mod argv;
+
+
diff --git a/pull_request_sanitizer/src/lib.rs b/pull_request_sanitizer/src/lib.rs
index 578a1fb594d5fcdd2398e55c4e22721d42741920..6229cca93145f279d9ac93133e602e658c34e7c6 100644
--- a/pull_request_sanitizer/src/lib.rs
+++ b/pull_request_sanitizer/src/lib.rs
@@ -1,5 +1,45 @@
+use std::fs;
+use std::path::Path;
+
+use pipeline::SanitizedOutputCveData;
+
 mod params;
 pub mod pipeline;
 mod errors;
 mod utils;
-mod repo;
\ No newline at end of file
+mod repo;
+
+pub fn load_sanitized_output_cve_data<P : AsRef<Path>>(cve_dir_path : P) -> Result<Vec<SanitizedOutputCveData>, Box<dyn std::error::Error>> {
+    let entries = fs::read_dir(&cve_dir_path).unwrap();
+
+    let mut cve_datas : Vec<SanitizedOutputCveData> = Vec::new();
+
+    for entry in entries {
+        let path = match entry {
+            Ok(entry) => entry.path(),
+            Err(_) => continue, // Skip to next iteration if entry is an error
+        };
+    
+        if !path.is_dir() {
+            continue; // Skip to next iteration if path is not a directory
+        }
+    
+        let data_path = path.join("data.json");
+        if !data_path.exists() {
+            continue; // Skip to next iteration if data.json does not exist
+        }
+    
+        let contents =fs::read_to_string(&data_path)?;
+    
+        let data = match serde_json::from_str(&contents) {
+            Ok(data) => data,
+            Err(_) => panic!("Failed to parse data.json"),
+        };
+    
+        cve_datas.push(data);
+    }
+
+    cve_datas.sort_by(|a, b| a.get_cve_id().cmp(&b.get_cve_id()));
+
+    Ok(cve_datas)
+}
\ No newline at end of file
diff --git a/pull_request_sanitizer/src/main.rs b/pull_request_sanitizer/src/main.rs
index ee54a07721cbdaafbd3dfadfd95eaa3c350ece92..69fda20fdff94e60f7eb0e3dbd7fce43fb7973af 100644
--- a/pull_request_sanitizer/src/main.rs
+++ b/pull_request_sanitizer/src/main.rs
@@ -43,7 +43,7 @@ fn main() {
                 continue;
             }
 
-            match pipeline::sanitize_cve(&path) {
+            match pipeline::sanitize_cve(&path, argv.check_in_repo) {
                 Ok((result, patch_data)) => {
                     fs::create_dir_all(&output_cve_folder).unwrap();
                     let output_file = output_cve_folder.join("data.json");
diff --git a/pull_request_sanitizer/src/params/argv.rs b/pull_request_sanitizer/src/params/argv.rs
index ae9ef4aba077a0ad8ef1e88484b7ec9714bc1671..1fc3a63317f0da4d3175413e91afb443e94586ce 100644
--- a/pull_request_sanitizer/src/params/argv.rs
+++ b/pull_request_sanitizer/src/params/argv.rs
@@ -15,6 +15,11 @@ pub struct Argv {
     /// delete the output folder if it already exists
     #[arg(long, action)]
     pub delete_output_folder : bool,
+
+
+    /// check also in the repo itself (using git2, take a lot of time and space)
+    #[arg(long, action)]
+    pub check_in_repo : bool,
 }   
 
 
diff --git a/pull_request_sanitizer/src/pipeline.rs b/pull_request_sanitizer/src/pipeline.rs
index a37099ad41b7d52c77cf0248848cc81c221d0d16..ac627f85f4be7bcd758056354b4d6a01e0f7452b 100644
--- a/pull_request_sanitizer/src/pipeline.rs
+++ b/pull_request_sanitizer/src/pipeline.rs
@@ -55,7 +55,7 @@ impl SanitizedOutputCveData {
 
 
 /// Sanitize a cve directory (return the sanitized data and the patch data)
-pub(crate) fn sanitize_cve<P : AsRef<Path>>(cve_dir_path : P) -> Result<(SanitizedOutputCveData, String), CveSanitizerError> {
+pub(crate) fn sanitize_cve<P : AsRef<Path>>(cve_dir_path : P, check_in_repo : bool) -> Result<(SanitizedOutputCveData, String), CveSanitizerError> {
     let dir_path = cve_dir_path.as_ref();
     
 
@@ -86,11 +86,17 @@ pub(crate) fn sanitize_cve<P : AsRef<Path>>(cve_dir_path : P) -> Result<(Sanitiz
         repo_type, 
         language,
         warnings
-    ) = get_and_and_check_repo_name(&pull_request_data, merge_commit_sha.as_str())?;
+    ) = get_and_and_check_repo_name(&pull_request_data, merge_commit_sha.as_str(), check_in_repo)?;
 
     // prepare the new commit in the repo
-    let repo = load_repo_from_name(&repo_name);
-    let oid = apply_patch_and_retrieve_commit(&repo, merge_commit_sha.as_str(), patch_data.as_str())?;
+    let merged_commit_sha = if check_in_repo {
+        let repo = load_repo_from_name(&repo_name);
+        let oid = apply_patch_and_retrieve_commit(&repo, merge_commit_sha.as_str(), patch_data.as_str())?;
+        oid.to_string()
+    } else {
+        String::new()
+    };
+    
 
 
     
@@ -103,7 +109,7 @@ pub(crate) fn sanitize_cve<P : AsRef<Path>>(cve_dir_path : P) -> Result<(Sanitiz
         language,
         warnings,
         commit_sha : merge_commit_sha.to_string(),
-        reconstructed_merged_commit_sha : oid.to_string()
+        reconstructed_merged_commit_sha : merged_commit_sha,
     };
 
     
diff --git a/pull_request_sanitizer/src/repo/mod.rs b/pull_request_sanitizer/src/repo/mod.rs
index 2b45276de0a08674909ed7eda1bb4f219ad0adb7..337d0603a26cb60c331a34627f7469dc4ed56e83 100644
--- a/pull_request_sanitizer/src/repo/mod.rs
+++ b/pull_request_sanitizer/src/repo/mod.rs
@@ -76,7 +76,8 @@ impl RepoType {
         &self, 
         pull_request_data : &serde_json::Value, 
         commit_to_test : &str,
-        _main_url : &str
+        _main_url : &str,
+        check_in_repo : bool,
     ) -> Result<(String, Option<Language>), CveSanitizerApiWarning> {
         // determine the repo name
         let repo_name = pull_request_data.get_json_value_from_path(
@@ -109,9 +110,10 @@ impl RepoType {
         }
 
         // test if the commit is in the cloned repo
-        let repo = load_repo_from_name(repo_name);
-        let _commit = retrieve_commit(&repo, commit_to_test).map_err(|_| self.get_merge_commit_not_found_in_cloned_repo_warning())?;
-
+        if check_in_repo {
+            let repo = load_repo_from_name(repo_name);
+            let _commit = retrieve_commit(&repo, commit_to_test).map_err(|_| self.get_merge_commit_not_found_in_cloned_repo_warning())?;
+        }
         Ok((repo_name.to_string(), Language::new_from_github_language(language)))
     }
 }
@@ -120,7 +122,8 @@ impl RepoType {
 /// if the commit is in the 2 repos, it will return the head repo
 pub fn get_and_and_check_repo_name(
     pull_request_data : &serde_json::Value, 
-    commit_to_test : &str
+    commit_to_test : &str,
+    check_in_repo : bool,
 ) -> Result<(String, RepoType, Option<Language>, Vec<CveSanitizerApiWarning>), CveSanitizerError> {
     let mut warnings = vec![];
 
@@ -133,11 +136,11 @@ pub fn get_and_and_check_repo_name(
 
     // ----------- head repo ------------
 
-    let head_repo = RepoType::Head.test_repo(pull_request_data, commit_to_test, main_url);
+    let head_repo = RepoType::Head.test_repo(pull_request_data, commit_to_test, main_url, check_in_repo);
 
     // ----------- base repo ------------
 
-    let base_repo = RepoType::Base.test_repo(pull_request_data, commit_to_test, main_url);
+    let base_repo = RepoType::Base.test_repo(pull_request_data, commit_to_test, main_url, check_in_repo);
 
     // collect the results
     if head_repo.is_err() {
diff --git a/rust_utils/src/git_utils/mod.rs b/rust_utils/src/git_utils/mod.rs
index 731d9e571f3daba136c15167daf82143860cb879..8ae1e8f4dbfe51d88c24672f1b99498f5b0deb1d 100644
--- a/rust_utils/src/git_utils/mod.rs
+++ b/rust_utils/src/git_utils/mod.rs
@@ -23,7 +23,9 @@ pub fn apply_patch_and_retrieve_commit<'a>(
 
     repository.set_index(&mut new_index)?; // set the new index
 
-    let new_tree = new_index.write_tree()?; // write the new tree
+    let new_tree_oid = new_index.write_tree()?; // write the new tree
+
+    let new_tree = repository.find_tree(new_tree_oid)?; // find the new tree
 
     let signature = Signature::now("sanitized cve automatique", "clement.lahoche@inria.fr")?; // create a signature