Commit 37445a78 authored by NOEL Philippe's avatar NOEL Philippe

implement all methods for Structure

parent bb26b649
......@@ -10,10 +10,12 @@ extern crate lazy_static;
mod pdb;
pub use self::pdb::read_pdb::parse_pdb;
pub use self::pdb::write_pdb::write_pdb;
pub use self::pdb::read_pdb::read_pdb;
pub use self::pdb::tools::*;
pub use self::pdb::write_pdb::write_pdb;
pub use self::pdb::atom::*;
pub use self::pdb::chain::Chain;
pub use self::pdb::protein::Protein;
pub use self::pdb::residue::Residue;
pub use self::pdb::structure::Structure;
\ No newline at end of file
......@@ -67,7 +67,7 @@ impl Atom {
}
/// Enumerate to check the types of the parsed atom.
///
///
/// Atom will be class into the following map:
/// "ARG" => AtomTypes::AminoAcid
/// "LYS" => AtomTypes::AminoAcid
......@@ -111,7 +111,7 @@ pub enum AtomTypes {
impl AtomTypes {
/// Return a AtomTypes according to the "residue" of the atom
///
///
/// ````
/// use pdbparser::AtomTypes;
///
......
pub mod atom;
pub mod chain;
pub mod protein;
pub mod structure;
pub mod read_pdb;
pub mod residue;
pub mod write_pdb;
pub mod tools;
pub mod write_pdb;
mod selection_atom;
......@@ -3,8 +3,10 @@ use std::io::prelude::*;
use std::io::BufReader;
use std::process;
use super::protein::Protein;
use super::atom::AtomTypes;
use super::protein::Protein;
use super::structure::Structure;
/// Parse the string to return a f32. The `trim` is used to remove
/// /n and spaces.
......@@ -43,6 +45,64 @@ fn parse_int(s: &str) -> i64 {
}
}
/// Parse the pdb file and return a `Structure`
///
/// # Examples
/// ```
/// use pdbparser;
/// let my_struct = pdbparser::read_pdb("tests/tests_file/f2.pdb", "f2");
/// assert_eq!(66, my_struct.get_residue_number());
/// ```
pub fn read_pdb(pdb: &str, name: &str) -> Structure {
// Check if the file exist and/or can be read
let pdb = match File::open(pdb) {
Ok(f) => f,
Err(e) => {
eprintln!("Could not open the file \"{}\"\nError: {}", pdb, e);
process::exit(1);
}
};
let reader = BufReader::new(pdb);
let mut structure = Structure::new(String::from("toto"));
for line in reader.lines() {
let l = line.unwrap();
if l.starts_with("HETAM") || l.starts_with("ATOM") {
// First get the resname.
// If the "residue" is a amino acid, continue to parse the line and add informations to the protein
// else continue to the next one line
let residue_name = &l[17..20].trim();
match AtomTypes::get(&residue_name.to_uppercase()[..]) {
Some(AtomTypes::AminoAcid) => {
let atom_name = &l[12..17].trim().to_string();
let chain = l[21..22].chars().next().unwrap();
let atom_number = parse_int(&l[6..11].to_string());
let residue_number = parse_int(&l[22..26].to_string());
let x = parse_float(&l[30..38].to_string());
let y = parse_float(&l[38..46].to_string());
let z = parse_float(&l[46..54].to_string());
// Add informations to the Structure
structure.update_structure(
chain,
residue_name.to_string(),
residue_number as u64,
atom_name.clone(),
atom_number as u64,
[x, y, z],
);
}
Some(AtomTypes::Water) => (),
Some(AtomTypes::Lipid) => (),
Some(AtomTypes::NucleicAcid) => (),
_ => (), //TODO: Warning with unrecognize atom
}
}
}
structure
}
/// Parse the pdb file and return a protein structure
///
/// # Examples
......@@ -52,7 +112,6 @@ fn parse_int(s: &str) -> i64 {
/// assert_eq!(66, my_prot.get_number_residue());
/// ```
pub fn parse_pdb(pdb: &str, name: &str) -> Protein {
// Check if the file exist and/or can be read
let pdb = match File::open(pdb) {
Ok(f) => f,
......@@ -63,6 +122,7 @@ pub fn parse_pdb(pdb: &str, name: &str) -> Protein {
};
let reader = BufReader::new(pdb);
let mut structure = Structure::new(String::from("toto"));
let mut protein = Protein::new(name.to_string());
for line in reader.lines() {
......@@ -94,10 +154,9 @@ pub fn parse_pdb(pdb: &str, name: &str) -> Protein {
Some(AtomTypes::Water) => (),
Some(AtomTypes::Lipid) => (),
Some(AtomTypes::NucleicAcid) => (),
_ => () //TODO: Warning with unrecognize atom
_ => (), //TODO: Warning with unrecognize atom
}
}
}
protein
}
use super::chain::Chain;
use super::residue::Residue;
use super::atom::Atom;
/// A `Structure` is the SUPER structure which contain molecules like Protein, DNA, etc.
///
pub struct Structure {
pub name: String,
pub chains: Vec<Chain>,
last_chain_added: char,
}
impl Structure {
/// Create a new `Structure`
///
/// # Examples
///
/// ````
/// use pdbparser;
///
/// let my_prot = pdbparser::Structure::new(String::from("my_struct"));
///
/// ````
pub fn new(n: String) -> Structure {
Structure {
name: n,
chains: Vec::new(),
last_chain_added: ' ',
}
}
/// Get the name of the `Structure`
///
/// # Examples
///
/// ````
/// use pdbparser;
///
/// let my_struct = pdbparser::Structure::new(String::from("my_struct"));
///
/// assert_eq!("my_struct", my_struct.name());
///
/// ````
pub fn name(&self) -> &str {
&self.name
}
/// Add a new `Chain` in the `Structure`
///
/// # Examples
///
/// ````
/// use pdbparser;
///
/// let mut my_struct = pdbparser::Structure::new(String::from("my_struct"));
/// let my_chain_a = pdbparser::Chain::new('a');
///
/// my_struct.add_chain(my_chain_a);
///
/// assert_eq!(1, my_struct.get_chain_number());
///
/// ````
pub fn add_chain(&mut self, c: Chain) {
self.last_chain_added = c.get_name();
self.chains.push(c);
}
/// Return True if the chain is in the `Structure`
///
/// # Examples
///
/// ````
/// use pdbparser;
///
/// let mut my_struct = pdbparser::Structure::new(String::from("my_struct"));
/// let my_chain_a = pdbparser::Chain::new('a');
/// my_struct.add_chain(my_chain_a);
///
/// assert!(my_struct.is_chain('a'));
///
/// ````
pub fn is_chain(&self, c: char) -> bool {
for ii in &self.chains {
if ii.get_name() == c {
return true;
}
}
false
}
/// Get the number of `Chain` in the `Structure`
///
/// # Examples
///
/// ````
/// use pdbparser;
///
/// let my_struct = pdbparser::Structure::new(String::from("my_struct"));
///
/// assert_eq!(0, my_struct.get_chain_number());
/// ````
pub fn get_chain_number(&self) -> usize {
self.chains.len()
}
/// Return the number of `Residue` in the `Structure`
///
/// According to PDB format, residue can be molecules of amino acid, nucleic acid, lipid or ligand
///
/// # Examples
///
/// ````
/// use pdbparser;
///
/// let mut my_struct = pdbparser::Structure::new(String::from("my_struct"));
/// let mut my_chain = pdbparser::Chain::new('n');
/// let lys = pdbparser::Residue::new(String::from("lysine"), 1);
/// let pro = pdbparser::Residue::new(String::from("proline"), 2);
///
/// my_chain.add_res(lys);
/// my_chain.add_res(pro);
/// my_struct.add_chain(my_chain);
///
/// assert_eq!(2, my_struct.get_residue_number());
///
/// ````
pub fn get_residue_number(&self) -> u64 {
let mut n: u64 = 0;
for chain in self.chains.iter() {
for _ in chain.lst_res.iter() {
n += 1;
}
}
n
}
/// Generate a vector of atoms index
/// Can be used in other program like rrmsd_map to select specific atoms
///
/// # Examples
/// ```
/// use pdbparser;
///
/// let my_struct = pdbparser::read_pdb("tests/tests_file/f2_adn.pdb", "f2");
/// let atom_index = my_struct.get_atom_index();
///
/// assert_eq!(atom_index[0], 1);
/// assert_eq!(atom_index[1], 2);
/// ```
pub fn get_atom_index(&self) -> Vec<u64> {
let mut lst: Vec<u64> = Vec::new();
for chain in &self.chains {
for res in &chain.lst_res {
for atom in &res.lst_atom {
lst.push(atom.number);
}
}
}
lst
}
/// Return a mutable reference of a chaine with its name. Return None if the
/// chain does not exist
///
/// # Examples
///
/// ````
/// use pdbparser;
///
/// let mut my_struct = pdbparser::Structure::new(String::from("my_struct"));
/// my_struct.add_chain(pdbparser::Chain::new('n'));
/// assert_eq!('n', my_struct.chains[0].get_name());
/// {
/// let mut reference = my_struct.get_chain_ref('n').unwrap();
/// reference.name = 'a';
/// }
/// assert_eq!('a', my_struct.chains[0].get_name());
/// ````
pub fn get_chain_ref(&mut self, c: char) -> Option<&mut Chain> {
for chain in &mut self.chains {
if chain.name == c {
return Some(chain);
}
}
None
}
/// Function that add information on the `Structure` (used in the parsing)
/// /!\Change this to a macro!
///
pub fn update_structure(
&mut self,
chain: char,
res_name: String,
res_number: u64,
atom_name: String,
atom_number: u64,
coord: [f32; 3],
) {
// Get a chain reference. If the chain exist, return a mutable reference to it. If not,
// create a new chain an return the mutable reference
let chain = match self.get_chain_ref(chain) {
Some(c) => c,
None => {
self.add_chain(Chain::new(chain));
self.get_chain_ref(chain).unwrap()
}
};
// Get a residue reference. If the residue exist, return a mutable reference to it. If not,
// create a new residue and return it as mutable reference
let residue = match chain.get_residue_ref(res_number as u64) {
Some(r) => r,
None => {
chain.add_res(Residue::new(res_name, res_number));
chain.get_residue_ref(res_number).unwrap()
}
};
let atom = Atom::new(atom_name, atom_number, coord);
residue.add_atom(atom);
}
}
\ No newline at end of file
......@@ -55,4 +55,4 @@ pub fn fasta_seq(my_prot: Protein) -> String {
}
}
fasta
}
\ No newline at end of file
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment