Commit 7168f0fb authored by NOEL Philippe's avatar NOEL Philippe

Change the fasta func from prot to tools

parent 45b7ec13
//! # pdbparser
//!
//!
//! `pdbparser` is a library to manipulate protein structure. It can parse, and filter PDB files.
//! You can create a protein structure by parsing with parse_pdb function. Then you can add filters on your protein.
//! And save it with write_pdb function.
//!
//!
mod pdb;
pub use self::pdb::read_pdb::parse_pdb as parse_pdb;
pub use self::pdb::read_pdb::parse_pdb;
pub use self::pdb::write_pdb::write_pdb;
pub use self::pdb::tools::*;
pub use self::pdb::protein::Protein;
pub use self::pdb::atom::Atom;
pub use self::pdb::residue::Residue;
pub use self::pdb::chain::Chain;
pub use self::pdb::protein::Protein;
pub use self::pdb::residue::Residue;
......@@ -4,19 +4,19 @@ use pdbparser::*;
fn main() {
let my_prot = parse_pdb("tests/tests_file/f2.pdb", "5jpq");
match write_pdb(&my_prot, "toto.pdb"){
match write_pdb(&my_prot, "toto.pdb") {
Ok(_) => (),
Err(e) => println!("Error : {}", e),
};
use pdbparser;
let my_prot = pdbparser::parse_pdb("tests/tests_file/f2.pdb", "f2");
let mut backbone = my_prot.select_atoms("backbone").unwrap();
let lst_atom_id = backbone.get_atom_index();
assert_eq!(1, lst_atom_id[0]);
assert_eq!(5, lst_atom_id[1]);
backbone.refine_dialing();
let lst_atom_id = backbone.get_atom_index();
assert_eq!(1, lst_atom_id[0]);
......@@ -26,7 +26,4 @@ fn main() {
// let chain_a = my_prot.select_atoms("chain a").unwrap();
// println!("Prot : {} \nn chain: {}\nn res: {}\nn atom: {}", chain_a.name, chain_a.get_number_chain(), chain_a.get_number_residue(), chain_a.get_number_atom());
}
......@@ -6,7 +6,7 @@ use std::ops::Deref;
/// - atom number (atomid);
/// - Coordinates x, y and z
/// - if the atom is a constituant of the backbone of the protein
///
///
#[derive(Debug)]
pub struct Atom {
pub name: String,
......@@ -20,16 +20,16 @@ impl Atom {
/// If the atom name is "C", "CA", "N", "O", "OT1" or "OT2", it will be consider as backbone
///
/// # Examples
///
///
/// ````
/// use pdbparser;
///
///
/// let hydrogen = pdbparser::Atom::new(String::from("HT1"), 1, [0.0, 0.0, 0.0]);
///
///
/// ````
pub fn new(name: String, number: u64, coord: [f32; 3]) -> Atom {
let n = name.deref();
let back = n == "C" || n == "CA" || n == "N" || n == "O" || n == "OT1" || n == "OT2";
let back = n == "C" || n == "CA" || n == "N" || n == "O" || n == "OT1" || n == "OT2";
Atom {
name,
number,
......@@ -39,29 +39,28 @@ impl Atom {
}
/// Get the name of the atom
///
pub fn name(&self) -> String{
///
pub fn name(&self) -> String {
self.name.clone()
}
/// Compute the distance between 2 Atoms
///
/// # Examples
///
///
/// ````
/// use pdbparser;
///
///
/// let h1 = pdbparser::Atom::new(String::from("HT1"), 1, [1.0, 5.0, 2.0]);
/// let h2 = pdbparser::Atom::new(String::from("HT1"), 1, [11.0, 17.0, 5.0]);
///
///
/// assert_eq!(15.905973, h1.compute_distance(&h2));
///
///
/// ````
pub fn compute_distance(&self, a: &Atom) -> f32 {
(
(self.coord[0] - a.coord[0]).powi(2) +
(self.coord[1] - a.coord[1]).powi(2) +
(self.coord[2] - a.coord[2]).powi(2)
).sqrt()
((self.coord[0] - a.coord[0]).powi(2)
+ (self.coord[1] - a.coord[1]).powi(2)
+ (self.coord[2] - a.coord[2]).powi(2))
.sqrt()
}
}
use super::residue::Residue;
/// A `Chain` is a sub-structure linked to a `Protein`.
/// It contain one or more `Residue` and a name
///
///
#[derive(Debug)]
pub struct Chain {
pub name: char,
......@@ -14,12 +13,12 @@ impl Chain {
/// Create a new chain structure with an empty list of residue
///
/// # Examples
///
///
/// ````
/// use pdbparser;
///
///
/// let my_chain = pdbparser::Chain::new('a');
///
///
/// ````
pub fn new(name: char) -> Chain {
Chain {
......@@ -31,17 +30,17 @@ impl Chain {
/// Add a new structure residue to the Chain
///
/// # Examples
///
///
/// ````
/// use pdbparser;
///
///
/// let mut my_chain = pdbparser::Chain::new('a');
/// let lys = pdbparser::Residue::new(String::from("lysine"), 1);
///
///
/// my_chain.add_res(lys);
///
///
/// assert_eq!(1, my_chain.get_number_residue());
///
///
/// ````
pub fn add_res(&mut self, r: Residue) {
self.lst_res.push(r);
......@@ -50,14 +49,14 @@ impl Chain {
/// Get the number of residue in the Chain
///
/// # Examples
///
///
/// ````
/// use pdbparser;
///
///
/// let my_chain = pdbparser::Chain::new('a');
///
///
/// assert_eq!(0, my_chain.get_number_residue());
///
///
/// ````
pub fn get_number_residue(&self) -> u64 {
self.lst_res.len() as u64
......@@ -67,26 +66,26 @@ impl Chain {
/// residue does not exist
///
/// # Examples
///
///
/// ````
/// use pdbparser;
///
///
/// let mut my_chain = pdbparser::Chain::new('a');
/// let lys = pdbparser::Residue::new(String::from("lysine"), 1);
/// my_chain.add_res(lys);
///
///
/// assert_eq!(1, my_chain.lst_res[0].res_num);
/// {
/// let mut res_ref = my_chain.get_residue_ref(1).unwrap();
/// res_ref.res_num = 4;
/// }
/// assert_eq!(4, my_chain.lst_res[0].res_num);
///
///
/// ````
pub fn get_residue_ref(&mut self, n: u64) -> Option<&mut Residue> {
for res in &mut self.lst_res {
if res.res_num == n {
return Some(res)
return Some(res);
}
}
None
......@@ -96,4 +95,4 @@ impl Chain {
pub fn get_name(&self) -> char {
self.name
}
}
\ No newline at end of file
}
pub mod atom;
pub mod chain;
pub mod protein;
pub mod residue;
pub mod read_pdb;
pub mod chain;
pub mod residue;
pub mod write_pdb;
pub mod tools;
mod selection_atom;
\ No newline at end of file
mod selection_atom;
This diff is collapsed.
......@@ -5,7 +5,6 @@ use std::process;
use super::protein::Protein;
/// Parse the string to return a f32. The `trim` is used to remove
/// /n and spaces.
///
......@@ -24,7 +23,7 @@ fn parse_float(s: &str) -> f32 {
/// Parse the string to return a i64. The `trim` is used to remove
/// /n and spaces.
/// In large PDB, atom number can be > 99,999.
/// In large PDB, atom number can be > 99,999.
/// In VMD, the atom number is in hexadecimal after 99,999
///
/// # Errors
......@@ -33,20 +32,18 @@ fn parse_float(s: &str) -> f32 {
fn parse_int(s: &str) -> i64 {
match s.trim().parse::<i64>() {
Ok(n) => n,
Err(e) => {
match i64::from_str_radix(s.trim(), 16) {
Ok(n) => n,
Err(_) => {
println!("{}", e);
0
}
Err(e) => match i64::from_str_radix(s.trim(), 16) {
Ok(n) => n,
Err(_) => {
println!("{}", e);
0
}
}
},
}
}
/// Parse the pdb file and return a protein structure
///
///
/// # Examples
/// ```
/// use pdbparser;
......@@ -54,13 +51,10 @@ fn parse_int(s: &str) -> i64 {
/// assert_eq!(66, my_prot.get_number_residue());
/// ```
pub fn parse_pdb(pdb: &str, name: &str) -> Protein {
// Allocate here to avoid multiple allocation for every call
// Allocate here to avoid multiple allocation for every call
let lst_res = vec![
"ARG", "HIS", "HSE", "HSD", "LYS", "LYS", "ASP", "GLU", "SER", "THR", "ASN",
"GLN", "CYS", "SEC", "GLY", "PRO", "ALA", "VAL", "ILE", "LEU",
"MET", "PHE", "TYR", "TRP"
"ARG", "HIS", "HSE", "HSD", "LYS", "LYS", "ASP", "GLU", "SER", "THR", "ASN", "GLN", "CYS",
"SEC", "GLY", "PRO", "ALA", "VAL", "ILE", "LEU", "MET", "PHE", "TYR", "TRP",
];
// Check if the file exist and/or can be read
......@@ -74,16 +68,15 @@ pub fn parse_pdb(pdb: &str, name: &str) -> Protein {
let reader = BufReader::new(pdb);
let mut protein = Protein::new(name.to_string());
for line in reader.lines() {
let l = line.unwrap();
if l.starts_with("HETAM") || l.starts_with("ATOM") {
// First get the resname.
// First get the resname.
// If the "residue" is a protein residue, continue to parse the line and add informations to the protein
// else continue to the next one line
let residue_name = &l[17..20].trim();
if is_protein_res(residue_name, &lst_res) {
let atom_name = &l[12..17].trim().to_string();
let chain = l[21..22].chars().next().unwrap();
let atom_number = parse_int(&l[6..11].to_string());
......@@ -92,22 +85,28 @@ pub fn parse_pdb(pdb: &str, name: &str) -> Protein {
let y = parse_float(&l[38..46].to_string());
let z = parse_float(&l[46..54].to_string());
// Add informations to the protein
protein.update_protein(chain, residue_name.to_string(), residue_number as u64, atom_name.clone(), atom_number as u64, [x, y, z]);
protein.update_protein(
chain,
residue_name.to_string(),
residue_number as u64,
atom_name.clone(),
atom_number as u64,
[x, y, z],
);
}
}
}
protein
}
/// Test if the selected line is a residue
///
///
fn is_protein_res(r: &str, lst: &[&str]) -> bool {
let r = r.to_uppercase();
for res in lst {
if r == *res {
return true
return true;
}
}
false
}
\ No newline at end of file
}
......@@ -6,11 +6,11 @@ use super::atom::Atom;
/// - res name;
/// - res number (resid);
/// - a list of atom(s)
///
///
#[derive(Debug)]
pub struct Residue {
pub name: String,
pub res_num: u64,
pub res_num: u64,
pub lst_atom: Vec<Atom>,
}
......@@ -19,29 +19,29 @@ impl Residue {
/// The Residue have a name and a number
///
/// # Examples
///
///
/// ````
/// use pdbparser;
///
///
/// let lys = pdbparser::Residue::new(String::from("lysine"), 1);
///
///
/// ````
pub fn new(name: String, res_num: u64) -> Residue {
Residue {
name,
res_num,
res_num,
lst_atom: Vec::new(),
}
}
/// Get the name of the residue
///
///
pub fn name(&self) -> String {
self.name.clone()
}
/// Get the residue ID of the residue
///
///
pub fn get_res_num(&self) -> u64 {
self.res_num
}
......@@ -49,13 +49,13 @@ impl Residue {
/// Get the number of Atom in the Residue
///
/// # Examples
///
///
/// ````
/// use pdbparser;
///
///
/// let lys = pdbparser::Residue::new(String::from("lysine"), 1);
/// assert_eq!(0, lys.get_number_atom());
///
///
/// ````
pub fn get_number_atom(&self) -> u64 {
self.lst_atom.len() as u64
......@@ -64,20 +64,19 @@ impl Residue {
/// Add an Atom structure to the Residue
///
/// # Examples
///
///
/// ````
/// use pdbparser;
///
///
/// let mut lys = pdbparser::Residue::new(String::from("lysine"), 1);
/// let carbon = pdbparser::Atom::new(String::from("HT1"), 1, [0.0, 0.0, 0.0]);
///
///
/// lys.add_atom(carbon);
///
///
/// assert_eq!(1, lys.get_number_atom());
///
///
/// ````
pub fn add_atom(&mut self, a: Atom) {
self.lst_atom.push(a);
}
}
\ No newline at end of file
}
......@@ -12,7 +12,7 @@ pub enum Select {
/// Parse option given for the selection of residues and return
/// the index of begin and end of select residues.
///
///
fn parse_options_int(opt: &[&str]) -> Option<[usize; 2]> {
// if the len is 1 mean that the request is like "Resid 1".
// return Some(1, 1)
......@@ -31,15 +31,15 @@ fn parse_options_int(opt: &[&str]) -> Option<[usize; 2]> {
Ok(v) => v,
Err(_) => return None,
};
return Some([index1, index2])
return Some([index1, index2]);
}
None
}
/// Parse option given for the selection of atoms
///
///
/// TODO: Remove this one because it's a doublon of parse_option_int
///
///
fn parse_options_char(opt: &[&str]) -> Option<[char; 2]> {
// if the len is 1 mean that the request is like "Chain A".
// return Some(1, 1)
......@@ -58,7 +58,7 @@ fn parse_options_char(opt: &[&str]) -> Option<[char; 2]> {
Ok(v) => v,
Err(_) => return None,
};
return Some([index1, index2])
return Some([index1, index2]);
}
None
}
......@@ -74,38 +74,32 @@ pub fn parse_select(pattern: &str) -> Option<Vec<Select>> {
}
let mut lst_selection: Vec<Select> = Vec::new(); //vector to return containing sub_patterns
let sel: Vec<&str> = pattern.split("and").collect();
let sel: Vec<&str> = pattern.split("and").collect();
for pattern in sel {
let sub_pattern: Vec<&str> = pattern.split_whitespace().collect();
match sub_pattern[0].to_lowercase().as_str() {
"resid" => {
match parse_options_int(&sub_pattern[1..]) {
Some(x) => lst_selection.push(Select::Resid(x[0], x[1])),
None => return None,
}
}
"chain" => {
match parse_options_char(&sub_pattern[1..]) {
Some(x) => lst_selection.push(Select::Chain(x[0], x[1])),
None => return None,
}
}
"resid" => match parse_options_int(&sub_pattern[1..]) {
Some(x) => lst_selection.push(Select::Resid(x[0], x[1])),
None => return None,
},
"chain" => match parse_options_char(&sub_pattern[1..]) {
Some(x) => lst_selection.push(Select::Chain(x[0], x[1])),
None => return None,
},
"backbone" => lst_selection.push(Select::Backbone),
_ => {
println!("Error in the selection");
return None
},
return None;
}
}
}
Some(lst_selection)
}
pub fn atom_match(sel: &Vec<Select>, chain: char, res_id: u64, is_back: bool) -> bool {
// For each pattern in sel, the pattern is compare to the caracteristics of the atom
// if at any moment, the caracteristics are not ok, the function return false
// In the end, it return true (consider everythings is ok)
......@@ -113,20 +107,20 @@ pub fn atom_match(sel: &Vec<Select>, chain: char, res_id: u64, is_back: bool) ->
for pattern in sel {
match pattern {
Select::Chain(x, y) => {
if ! (chain >= *x && chain <= *y) {
return false
if !(chain >= *x && chain <= *y) {
return false;
}
},
}
Select::Backbone => {
if ! (is_back) {
return false
if !(is_back) {
return false;
}
},
}
Select::Resid(x, y) => {
if ! (res_id as usize >= *x && res_id as usize <= *y) {
return false
if !(res_id as usize >= *x && res_id as usize <= *y) {
return false;
}
},
}
}
}
true
......@@ -145,7 +139,6 @@ fn test_atom_search() {
s.push(Select::Backbone);
assert!(atom_match(&s, 'C', 15, true));
assert!(!atom_match(&s, 'C', 15, false));
}
#[test]
......@@ -154,7 +147,7 @@ fn test_parse_select() {
let v = parse_select("resid 5 to 96").unwrap();
println!("{:?}", v);
assert_eq!(Select::Resid(5, 96), v[0]);
let v = parse_select("resid 5 to 96 and chain B").unwrap();
assert_eq!(Select::Resid(5, 96), v[0]);
assert_eq!(Select::Chain('B', 'B'), v[1]);
......
use std::collections::HashMap;
use super::protein::Protein;
/// Convert the protein to a FASTA sequence (1 residue as 1 char)
/// Consult the corresponding table to have the code 1 letter <-> 3 letters
/// [Wikipedia amino acid](https://en.wikipedia.org/wiki/Amino_acid)
///
/// # Examples
/// ```
/// use pdbparser;
///
/// let my_prot = pdbparser::parse_pdb("tests/tests_file/f2.pdb", "f2");
/// assert_eq!("TSPQPYSIERTIRWLTYQVANSLALVSEADKIMQTEYMKMIQNSGEITDRGEAILRLLKTNKHYEH", pdbparser::fasta_seq(my_prot));
/// ```
pub fn fasta_seq(my_prot: Protein) -> String {
let res: HashMap<&str, char> = [
("ARG", 'R'),
("LYS", 'K'),
("ASN", 'N'),
("ASP", 'D'),
("GLU", 'E'),
("SER", 'S'),
("THR", 'T'),
("GLN", 'Q'),
("CYS", 'C'),
("HIS", 'H'),
("HSD", 'H'),
("HSP", 'H'),
("HSD", 'H'),
("SEC", 'U'),
("GLY", 'G'),
("PRO", 'P'),
("ALA", 'A'),
("VAL", 'V'),
("ILE", 'I'),
("LEU", 'L'),
("MET", 'M'),
("PHE", 'P'),
("TYR", 'Y'),
("TRP", 'W'),
]
.iter()
.cloned()
.collect();
let mut fasta = String::with_capacity(my_prot.get_number_residue() as usize);
for chain in &my_prot.lst_chain {
for residue in &chain.lst_res {
match res.get(&residue.name()[..]) {
Some(r) => fasta.push(*r),
None => (),
};
}
}
fasta
}
\ No newline at end of file
use super::protein::Protein;
use std::fs::File;
use std::io::Write;
use std::io;
use std::io::Write;
/// Write a PDB file for the `Protein`.
/// Be careful, the protein is write with the atom numbers in its structure. Remind to use the method
/// my_protein.refine_dialing() before !
pub fn write_pdb(my_prot: &Protein, file: &str) -> io::Result<()>{
pub fn write_pdb(my_prot: &Protein, file: &str) -> io::Result<()> {
let mut output_pdb = File::create(file)?;
for chain in &my_prot.lst_chain {
......@@ -21,19 +19,21 @@ pub fn write_pdb(my_prot: &Protein, file: &str) -> io::Result<()>{
let atom_name = atom.name();
let atom_id = atom.number;
let atom_coord = atom.coord;
output_pdb.write_fmt(format_args!("ATOM {:>5} {:<4}{:>3} {}{:>4} {:>8.3}{:>8.3}{:>8.3}{:>6.2}{:>6.2}\n",
atom_id,
atom_name,
res_name,
chain_name,
res_id,
atom_coord[0],
atom_coord[1],
atom_coord[2],
1.0, 0.0
output_pdb.write_fmt(format_args!(
"ATOM {:>5} {:<4}{:>3} {}{:>4} {:>8.3}{:>8.3}{:>8.3}{:>6.2}{:>6.2}\n",
atom_id,
atom_name,
res_name,
chain_name,
res_id,
atom_coord[0],
atom_coord[1],
atom_coord[2],
1.0,
0.0
))?;
}
}
}
Ok(())
}
\ No newline at end of file
}
......@@ -44,6 +44,8 @@ fn f2_res() {
let res = chain_a.get_residue_ref(2).unwrap();
assert_eq!("SER", res.name());
let res = chain_a.get_residue_ref(chain_a.get_number_residue()).unwrap();
let res = chain_a
.get_residue_ref(chain_a.get_number_residue())
.unwrap();
assert_eq!("HSD", res.name());
}
\ No newline at end of file
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment