Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 49760d8b authored by fitz35's avatar fitz35
Browse files

prepare ast queries

parent 755cb099
Branches main
No related tags found
No related merge requests found
......@@ -719,11 +719,11 @@ checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253"
[[package]]
name = "git2"
version = "0.16.1"
version = "0.18.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ccf7f68c2995f392c49fffb4f95ae2c873297830eb25c6bc4c114ce8f4562acc"
checksum = "232e6a7bfe35766bf715e55a88b39a700596c0ccfd88cd3680b4cdb40d66ef70"
dependencies = [
"bitflags 1.3.2",
"bitflags 2.5.0",
"libc",
"libgit2-sys",
"log",
......@@ -1086,6 +1086,21 @@ dependencies = [
"strum_macros",
]
[[package]]
name = "hyperast_query"
version = "0.1.0"
dependencies = [
"clap 4.5.4",
"dotenv",
"lazy_static",
"log",
"pull_request_sanitizer",
"rust_utils",
"serde",
"serde_derive",
"serde_json",
]
[[package]]
name = "iana-time-zone"
version = "0.1.60"
......@@ -1273,9 +1288,9 @@ checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd"
[[package]]
name = "libgit2-sys"
version = "0.14.2+1.5.1"
version = "0.16.2+1.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f3d95f6b51075fe9810a7ae22c7095f12b98005ab364d8544797a825ce946a4"
checksum = "ee4126d8b4ee5c9d9ea891dd875cfdc1e9d0950437179104b183d7d8a74d24e8"
dependencies = [
"cc",
"libc",
......@@ -1287,9 +1302,9 @@ dependencies = [
[[package]]
name = "libssh2-sys"
version = "0.2.23"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b094a36eb4b8b8c8a7b4b8ae43b2944502be3e59cd87687595cf6b0a71b3f4ca"
checksum = "2dc8a030b787e2119a731f1951d6a773e2280c660f8ec4b0f5e1505a386e71ee"
dependencies = [
"cc",
"libc",
......
......@@ -4,7 +4,8 @@ members = [
"extract_edition_script",
"rust_utils",
"pull_request_sanitizer",
"github_requester"
"github_requester",
"hyperast_query"
]
[workspace.dependencies]
......@@ -13,14 +14,16 @@ plot_helper = { git = "https://github.com/fitz35/data_analyze.git", rev = "96d97
hyper_ast = { path = "../HyperAST/hyper_ast"}
hyper_diff = { path = "../HyperAST/hyper_diff"}
hyper_ast_cvs_git = { path = "../HyperAST/cvs/git"}
hyper_ast_gen_ts_tsquery = { path = "../HyperAST/gen/tree-sitter"}
clap = { version = "4.2.5", features = ["derive"] }
git2 = { version = "0.16.1", features = ["vendored-libgit2", "vendored-openssl"] }
git2 = { version = "0.18.2", features = ["vendored-libgit2", "vendored-openssl"] }
lazy_static = "1.4.0"
serde = "1.0.159"
serde_derive = "1.0.159"
serde_json = "1.0.94"
......
use std::fs;
use log::{info, error};
use pull_request_sanitizer::load_sanitized_output_cve_data;
use pull_request_sanitizer::pipeline::SanitizedOutputCveData;
use rust_utils::logger::common_logger::init_logger;
......@@ -16,37 +17,8 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
let argv = get_program_args();
info!("🚀 Start extraction.");
let entries = fs::read_dir(&argv.dataset_path).unwrap();
let cve_datas : Vec<SanitizedOutputCveData> = load_sanitized_output_cve_data(&argv.dataset_path)?;
let mut cve_datas : Vec<SanitizedOutputCveData> = Vec::new();
for entry in entries {
let path = match entry {
Ok(entry) => entry.path(),
Err(_) => continue, // Skip to next iteration if entry is an error
};
if !path.is_dir() {
continue; // Skip to next iteration if path is not a directory
}
let data_path = path.join("data.json");
if !data_path.exists() {
continue; // Skip to next iteration if data.json does not exist
}
let contents =fs::read_to_string(&data_path)?;
let data = match serde_json::from_str(&contents) {
Ok(data) => data,
Err(_) => panic!("Failed to parse data.json"),
};
cve_datas.push(data);
}
cve_datas.sort_by(|a, b| a.get_cve_id().cmp(&b.get_cve_id()));
for cve_metadata in cve_datas.into_iter() {
let id = cve_metadata.get_cve_id().to_string();
......
[package]
name = "hyperast_query"
version = "0.1.0"
edition = "2021"
[dependencies]
rust_utils = { path = "../rust_utils" }
pull_request_sanitizer = { path = "../pull_request_sanitizer" }
lazy_static = {workspace = true}
log = {workspace = true}
dotenv = {workspace = true}
clap = {workspace = true}
serde = {workspace = true}
serde_derive = {workspace = true}
serde_json = {workspace = true}
\ No newline at end of file
use log::info;
use pull_request_sanitizer::load_sanitized_output_cve_data;
use pull_request_sanitizer::pipeline::SanitizedOutputCveData;
use rust_utils::logger::common_logger::init_logger;
use crate::params::argv::get_program_args;
mod params;
fn main() -> Result<(), Box<dyn std::error::Error>> {
dotenv::dotenv().ok();
init_logger();
let argv = get_program_args();
info!("🚀 Start testing the hyperast query.");
let cve_datas : Vec<SanitizedOutputCveData> = load_sanitized_output_cve_data(&argv.dataset_path)?;
for cve_data in cve_datas {
}
Ok(())
}
\ No newline at end of file
use clap::Parser;
/// Benchmark the query on the hyper ast
#[derive(Parser, Debug)]
#[command(author, version, about, long_about = None)]
pub struct Argv {
/// the dataset path to use
#[arg(short, long)]
pub dataset_path : String,
}
pub fn get_program_args() -> Argv {
return Argv::parse();
}
\ No newline at end of file
pub mod argv;
use std::fs;
use std::path::Path;
use pipeline::SanitizedOutputCveData;
mod params;
pub mod pipeline;
mod errors;
mod utils;
mod repo;
\ No newline at end of file
mod repo;
pub fn load_sanitized_output_cve_data<P : AsRef<Path>>(cve_dir_path : P) -> Result<Vec<SanitizedOutputCveData>, Box<dyn std::error::Error>> {
let entries = fs::read_dir(&cve_dir_path).unwrap();
let mut cve_datas : Vec<SanitizedOutputCveData> = Vec::new();
for entry in entries {
let path = match entry {
Ok(entry) => entry.path(),
Err(_) => continue, // Skip to next iteration if entry is an error
};
if !path.is_dir() {
continue; // Skip to next iteration if path is not a directory
}
let data_path = path.join("data.json");
if !data_path.exists() {
continue; // Skip to next iteration if data.json does not exist
}
let contents =fs::read_to_string(&data_path)?;
let data = match serde_json::from_str(&contents) {
Ok(data) => data,
Err(_) => panic!("Failed to parse data.json"),
};
cve_datas.push(data);
}
cve_datas.sort_by(|a, b| a.get_cve_id().cmp(&b.get_cve_id()));
Ok(cve_datas)
}
\ No newline at end of file
......@@ -43,7 +43,7 @@ fn main() {
continue;
}
match pipeline::sanitize_cve(&path) {
match pipeline::sanitize_cve(&path, argv.check_in_repo) {
Ok((result, patch_data)) => {
fs::create_dir_all(&output_cve_folder).unwrap();
let output_file = output_cve_folder.join("data.json");
......
......@@ -15,6 +15,11 @@ pub struct Argv {
/// delete the output folder if it already exists
#[arg(long, action)]
pub delete_output_folder : bool,
/// check also in the repo itself (using git2, take a lot of time and space)
#[arg(long, action)]
pub check_in_repo : bool,
}
......
......@@ -55,7 +55,7 @@ impl SanitizedOutputCveData {
/// Sanitize a cve directory (return the sanitized data and the patch data)
pub(crate) fn sanitize_cve<P : AsRef<Path>>(cve_dir_path : P) -> Result<(SanitizedOutputCveData, String), CveSanitizerError> {
pub(crate) fn sanitize_cve<P : AsRef<Path>>(cve_dir_path : P, check_in_repo : bool) -> Result<(SanitizedOutputCveData, String), CveSanitizerError> {
let dir_path = cve_dir_path.as_ref();
......@@ -86,11 +86,17 @@ pub(crate) fn sanitize_cve<P : AsRef<Path>>(cve_dir_path : P) -> Result<(Sanitiz
repo_type,
language,
warnings
) = get_and_and_check_repo_name(&pull_request_data, merge_commit_sha.as_str())?;
) = get_and_and_check_repo_name(&pull_request_data, merge_commit_sha.as_str(), check_in_repo)?;
// prepare the new commit in the repo
let repo = load_repo_from_name(&repo_name);
let oid = apply_patch_and_retrieve_commit(&repo, merge_commit_sha.as_str(), patch_data.as_str())?;
let merged_commit_sha = if check_in_repo {
let repo = load_repo_from_name(&repo_name);
let oid = apply_patch_and_retrieve_commit(&repo, merge_commit_sha.as_str(), patch_data.as_str())?;
oid.to_string()
} else {
String::new()
};
......@@ -103,7 +109,7 @@ pub(crate) fn sanitize_cve<P : AsRef<Path>>(cve_dir_path : P) -> Result<(Sanitiz
language,
warnings,
commit_sha : merge_commit_sha.to_string(),
reconstructed_merged_commit_sha : oid.to_string()
reconstructed_merged_commit_sha : merged_commit_sha,
};
......
......@@ -76,7 +76,8 @@ impl RepoType {
&self,
pull_request_data : &serde_json::Value,
commit_to_test : &str,
_main_url : &str
_main_url : &str,
check_in_repo : bool,
) -> Result<(String, Option<Language>), CveSanitizerApiWarning> {
// determine the repo name
let repo_name = pull_request_data.get_json_value_from_path(
......@@ -109,9 +110,10 @@ impl RepoType {
}
// test if the commit is in the cloned repo
let repo = load_repo_from_name(repo_name);
let _commit = retrieve_commit(&repo, commit_to_test).map_err(|_| self.get_merge_commit_not_found_in_cloned_repo_warning())?;
if check_in_repo {
let repo = load_repo_from_name(repo_name);
let _commit = retrieve_commit(&repo, commit_to_test).map_err(|_| self.get_merge_commit_not_found_in_cloned_repo_warning())?;
}
Ok((repo_name.to_string(), Language::new_from_github_language(language)))
}
}
......@@ -120,7 +122,8 @@ impl RepoType {
/// if the commit is in the 2 repos, it will return the head repo
pub fn get_and_and_check_repo_name(
pull_request_data : &serde_json::Value,
commit_to_test : &str
commit_to_test : &str,
check_in_repo : bool,
) -> Result<(String, RepoType, Option<Language>, Vec<CveSanitizerApiWarning>), CveSanitizerError> {
let mut warnings = vec![];
......@@ -133,11 +136,11 @@ pub fn get_and_and_check_repo_name(
// ----------- head repo ------------
let head_repo = RepoType::Head.test_repo(pull_request_data, commit_to_test, main_url);
let head_repo = RepoType::Head.test_repo(pull_request_data, commit_to_test, main_url, check_in_repo);
// ----------- base repo ------------
let base_repo = RepoType::Base.test_repo(pull_request_data, commit_to_test, main_url);
let base_repo = RepoType::Base.test_repo(pull_request_data, commit_to_test, main_url, check_in_repo);
// collect the results
if head_repo.is_err() {
......
......@@ -23,7 +23,9 @@ pub fn apply_patch_and_retrieve_commit<'a>(
repository.set_index(&mut new_index)?; // set the new index
let new_tree = new_index.write_tree()?; // write the new tree
let new_tree_oid = new_index.write_tree()?; // write the new tree
let new_tree = repository.find_tree(new_tree_oid)?; // find the new tree
let signature = Signature::now("sanitized cve automatique", "clement.lahoche@inria.fr")?; // create a signature
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment