diff --git a/Cargo.lock b/Cargo.lock index b3383df688dddd4b535d9ac9c1b44c430ba66984..8aa66181a6ace2d118ed659a0cc6bbf23b4187da 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1991,7 +1991,6 @@ version = "0.1.0" dependencies = [ "clap 4.5.4", "git2", - "hyper_ast_cvs_git", "log", "log4rs", "serde", diff --git a/pull_request_sanitizer/src/pipeline.rs b/pull_request_sanitizer/src/pipeline.rs index db7b4129e9d0c59f705074fa68f98be909a04942..a37099ad41b7d52c77cf0248848cc81c221d0d16 100644 --- a/pull_request_sanitizer/src/pipeline.rs +++ b/pull_request_sanitizer/src/pipeline.rs @@ -3,6 +3,8 @@ use std::path::Path; use cve_dataset::language::Language; use github_requester::get_string_from_url; +use log::error; +use rust_utils::git_utils::{apply_patch_and_retrieve_commit, load_repo_from_name, retrieve_commit}; use serde_derive::{Deserialize, Serialize}; use crate::errors::data_errors::CveSanitizerDataError; @@ -19,6 +21,7 @@ pub struct SanitizedOutputCveData { language : Option<Language>, warnings : Vec<CveSanitizerApiWarning>, commit_sha : String, + reconstructed_merged_commit_sha : String, } impl Display for SanitizedOutputCveData { @@ -83,7 +86,12 @@ pub(crate) fn sanitize_cve<P : AsRef<Path>>(cve_dir_path : P) -> Result<(Sanitiz repo_type, language, warnings - ) = get_and_and_check_repo_name(&pull_request_data, merge_commit_sha.as_str())?; + ) = get_and_and_check_repo_name(&pull_request_data, merge_commit_sha.as_str())?; + + // prepare the new commit in the repo + let repo = load_repo_from_name(&repo_name); + let oid = apply_patch_and_retrieve_commit(&repo, merge_commit_sha.as_str(), patch_data.as_str())?; + @@ -95,6 +103,7 @@ pub(crate) fn sanitize_cve<P : AsRef<Path>>(cve_dir_path : P) -> Result<(Sanitiz language, warnings, commit_sha : merge_commit_sha.to_string(), + reconstructed_merged_commit_sha : oid.to_string() }; diff --git a/rust_utils/Cargo.toml b/rust_utils/Cargo.toml index b78c79f1f6c310c1e16167a8c569424c956f7500..b6548bafa455929f9cb43a2f4c8421280b53f883 100644 --- a/rust_utils/Cargo.toml +++ b/rust_utils/Cargo.toml @@ -6,7 +6,6 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -hyper_ast_cvs_git = { workspace = true} git2 = { workspace = true} serde = {workspace = true} diff --git a/rust_utils/src/git_utils/fetching.rs b/rust_utils/src/git_utils/fetching.rs new file mode 100644 index 0000000000000000000000000000000000000000..ea237bd7df6814adecccb5d780e89efafdfdc270 --- /dev/null +++ b/rust_utils/src/git_utils/fetching.rs @@ -0,0 +1,91 @@ +use std::fmt::Display; +use std::path::{Path, PathBuf}; + +use git2::{RemoteCallbacks, Repository}; + +pub struct Url { + protocol: String, + domain: String, + path: String, +} + +impl TryFrom<String> for Url { + type Error = (); + + fn try_from(s: String) -> Result<Self, Self::Error> { + let (protocol, rest) = match s.split_once("://") { + Some((protocol, rest)) => (protocol, rest), + None => ("https", s.as_ref()), + }; + + let (domain, path) = rest.split_once("/").ok_or(())?; + + Ok(Self { + protocol: protocol.to_string(), + domain: domain.to_string(), + path: path.to_string(), + }) + } +} + +impl Display for Url { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}://{}/{}", self.protocol, self.domain, self.path) + } +} + +pub fn fetch_repository<'a, T: TryInto<Url>, U: Into<PathBuf>>(url: T, path: U, bare : bool) -> Repository +where + <T as TryInto<Url>>::Error: std::fmt::Debug, +{ + let url: Url = url.try_into().unwrap(); + let mut path: PathBuf = path.into(); + path.push(url.path.clone()); + // let url = &format!("{}{}", "https://github.com/", repo_name); + // let path = &format!("{}{}", "/tmp/hyperastgitresources/repo/", repo_name); + let mut callbacks = RemoteCallbacks::new(); + + callbacks.transfer_progress(|x| { + log::debug!("transfer {}/{}", x.received_objects(), x.total_objects()); + true + }); + + let mut fo = git2::FetchOptions::new(); + + fo.remote_callbacks(callbacks); + + let repository = up_to_date_repo(&path, fo, url, bare); + repository +} + +fn up_to_date_repo(path: &Path, mut fo: git2::FetchOptions, url: Url, bare : bool) -> Repository { + if path.join(".git").exists() { + let repository = match Repository::open(path) { + Ok(repo) => repo, + Err(e) => panic!("failed to open: {}", e), + }; + log::info!("fetch: {:?}", path); + repository + .find_remote("origin") + .unwrap() + .fetch(&["main"], Some(&mut fo), None) + .unwrap_or_else(|e| log::error!("{}", e)); + + repository + } else if path.exists() { + todo!() + } else { + let mut builder = git2::build::RepoBuilder::new(); + + builder.bare(bare); + + builder.fetch_options(fo); + + log::info!("clone {} in {:?}", url, path); + let repository = match builder.clone(&url.to_string(), path.join(".git").as_path()) { + Ok(repo) => repo, + Err(e) => panic!("failed to clone: {}", e), + }; + repository + } +} diff --git a/rust_utils/src/git_utils/mod.rs b/rust_utils/src/git_utils/mod.rs index 88dd3dcc508cbe584af63600031d0693abf7d430..731d9e571f3daba136c15167daf82143860cb879 100644 --- a/rust_utils/src/git_utils/mod.rs +++ b/rust_utils/src/git_utils/mod.rs @@ -1,11 +1,45 @@ use std::path::Path; -use git2::{Oid, Repository, Revwalk, Sort}; -use hyper_ast_cvs_git::git::fetch_repository; +use git2::{Diff, Oid, Repository, Revwalk, Signature, Sort}; use log::info; +use crate::git_utils::fetching::fetch_repository; use crate::params::TMP_REPO_FOLDER; +mod fetching; + +pub fn apply_patch_and_retrieve_commit<'a>( + repository: &'a Repository, + starting_commit_sha: &str, + patch_content: &str, +) -> Result<Oid, git2::Error> { + let init_commit = retrieve_commit(repository, starting_commit_sha)?; + let init_tree = init_commit.tree()?; + repository.checkout_tree(init_tree.as_object(), None)?; // reset the working directory to the commit + + let diff = Diff::from_buffer(patch_content.as_bytes())?; // load the diff + + let mut new_index = repository.apply_to_tree(&init_tree, &diff, None)?; // apply the diff to the tree + + repository.set_index(&mut new_index)?; // set the new index + + let new_tree = new_index.write_tree()?; // write the new tree + + let signature = Signature::now("sanitized cve automatique", "clement.lahoche@inria.fr")?; // create a signature + + + repository.commit( // commit the new tree + None, + &signature, + &signature, + "sanitized cve", + &new_tree, + &[&init_commit], + ) +} + + + pub fn get_parent_commit<'a>( repository: &'a Repository, commit: &str, @@ -60,7 +94,7 @@ pub fn load_repo_from_name(repo_name : &str) -> Repository { let repo_path = Path::new(TMP_REPO_FOLDER).join(repo_name); let url = format!("{}{}", "https://github.com/", repo_name); info!("fetching repo {}", url); - fetch_repository(url, &repo_path) + fetch_repository(url, &repo_path, false) } /// Extract the repo name from a pull request url (https://api.github.com/repos/OWNER/REPO/pulls/NUMBER)