Commit fc840f90 authored by Emmanuel Bresso's avatar Emmanuel Bresso

structure version

parent 447aa92b
Pipeline #75130 passed with stages
in 2 minutes and 24 seconds
[package]
name = "pdbparser"
version = "1.0.0"
authors = ["NOEL Philippe <philippe.noel@inria.fr>"]
authors = ["NOEL Philippe <philippe.noel@inria.fr>", "BRESSO Emmanuel <emmanuel.bresso@loria.fr>"]
edition = "2018"
include = ["Cargo.toml", "src/**/*.rs", "tests/**/*.rs", "README.md"]
......@@ -15,4 +15,4 @@ path = "src/lib.rs"
[dependencies]
lazy_static="1.3.0"
//! # pdbparser
//!
//! `pdbparser` is a library to manipulate protein structure. It can parse, and filter PDB files.
//! You can create a protein structure by parsing with parse_pdb function. Then you can add filters on your protein.
//! You can create a [`Structure`] by parsing with read_pdb function. Then you can add filters on your Structure.
//! And save it with write_pdb function.
//!
#[macro_use]
extern crate lazy_static;
mod pdb;
pub use self::pdb::read_pdb::parse_pdb;
pub use self::pdb::write_pdb::write_pdb;
pub use self::pdb::read_pdb::read_pdb;
pub use self::pdb::tools::*;
pub use self::pdb::write_pdb::write_pdb;
pub use self::pdb::atom::Atom;
pub use self::pdb::chain::Chain;
pub use self::pdb::protein::Protein;
pub use self::pdb::atom::*;
pub use self::pdb::chain::{Chain, ChainTypes};
pub use self::pdb::residue::Residue;
pub use self::pdb::structure::Structure;
......@@ -3,15 +3,16 @@ extern crate pdbparser;
use pdbparser::*;
fn main() {
let my_prot = parse_pdb("tests/tests_file/f2.pdb", "5jpq");
match write_pdb(&my_prot, "toto.pdb") {
Ok(_) => (),
Err(e) => println!("Error : {}", e),
};
//let my_prot = read_pdb("tests/tests_file/f2.pdb", "5jpq");
//match write_pdb(&my_prot, "toto.pdb") {
// Ok(_) => (),
// Err(e) => println!("Error : {}", e),
//};
use pdbparser;
let my_prot = pdbparser::parse_pdb("tests/tests_file/f2.pdb", "f2");
let my_prot = pdbparser::read_pdb("tests/tests_file/trp_MD.pdb", "5JPQ");
/*
let mut backbone = my_prot.select_atoms("backbone").unwrap();
let lst_atom_id = backbone.get_atom_index();
assert_eq!(1, lst_atom_id[0]);
......@@ -26,4 +27,5 @@ fn main() {
// let chain_a = my_prot.select_atoms("chain a").unwrap();
// println!("Prot : {} \nn chain: {}\nn res: {}\nn atom: {}", chain_a.name, chain_a.get_number_chain(), chain_a.get_number_residue(), chain_a.get_number_atom());
*/
}
use std::ops::Deref;
/// An `Atom` is a sub-structure linked to a `Residue`.
/// It stores the following properties:
/// An [`Atom`] is a sub-structure linked to a [`Residue`].
/// It stores the following properties
/// - atom name;
/// - atom number (atomid);
/// - Coordinates x, y and z
......@@ -16,7 +16,7 @@ pub struct Atom {
}
impl Atom {
/// Create a new structure Atom. An atom have a name, a number and x, y, z coordinates
/// Create a new structure [`Atom`]. An atom have a name, a number and x, y, z coordinates
/// If the atom name is "C", "CA", "N", "O", "OT1" or "OT2", it will be consider as backbone
///
/// # Examples
......@@ -38,13 +38,13 @@ impl Atom {
}
}
/// Get the name of the atom
/// Get the name of the [`Atom`]
///
pub fn name(&self) -> String {
self.name.clone()
}
/// Compute the distance between 2 Atoms
/// Compute the distance between 2 [`Atom`]
///
/// # Examples
///
......@@ -64,3 +64,4 @@ impl Atom {
.sqrt()
}
}
use super::residue::Residue;
use std::collections::HashMap;
/// A `Chain` is a sub-structure linked to a `Protein`.
/// It contain one or more `Residue` and a name
/// A [`Chain`] is a sub-structure linked to a [`Protein`].
/// It contain one or more [`Residue`] and a name
///
#[derive(Debug)]
pub struct Chain {
pub name: char,
pub lst_res: Vec<Residue>,
pub chain_type: ChainTypes,
}
impl Chain {
/// Create a new chain structure with an empty list of residue
/// Create a new [`Chain`] structure with an empty list of residue
///
/// # Examples
///
/// ````
/// use pdbparser;
///
/// let my_chain = pdbparser::Chain::new('a');
/// let my_chain = pdbparser::Chain::new('a', pdbparser::ChainTypes::Protein);
///
/// ````
pub fn new(name: char) -> Chain {
pub fn new(name: char, t: ChainTypes) -> Chain {
Chain {
name,
lst_res: Vec::new(),
chain_type: t,
}
}
/// Add a new structure residue to the Chain
/// Add a new structure [`Residue`] to the [`Chain`]
///
/// # Examples
///
/// ````
/// use pdbparser;
///
/// let mut my_chain = pdbparser::Chain::new('a');
/// let mut my_chain = pdbparser::Chain::new('a', pdbparser::ChainTypes::Protein);
/// let lys = pdbparser::Residue::new(String::from("lysine"), 1);
///
/// my_chain.add_res(lys);
......@@ -46,14 +49,14 @@ impl Chain {
self.lst_res.push(r);
}
/// Get the number of residue in the Chain
/// Get the number of [`Residue`] in the [`Chain`]
///
/// # Examples
///
/// ````
/// use pdbparser;
///
/// let my_chain = pdbparser::Chain::new('a');
/// let my_chain = pdbparser::Chain::new('a', pdbparser::ChainTypes::Protein);
///
/// assert_eq!(0, my_chain.get_number_residue());
///
......@@ -62,7 +65,7 @@ impl Chain {
self.lst_res.len() as u64
}
/// Return a mutable reference of a residue with its name. Return None if the
/// Return a mutable reference of a [`Residue`] with its name. Return None if the
/// residue does not exist
///
/// # Examples
......@@ -70,7 +73,7 @@ impl Chain {
/// ````
/// use pdbparser;
///
/// let mut my_chain = pdbparser::Chain::new('a');
/// let mut my_chain = pdbparser::Chain::new('a', pdbparser::ChainTypes::Protein);
/// let lys = pdbparser::Residue::new(String::from("lysine"), 1);
/// my_chain.add_res(lys);
///
......@@ -96,3 +99,107 @@ impl Chain {
self.name
}
}
/// Enumerate to check the types of the parsed atom.
///
/// [`Atom`] will be class into the following map:
/// "ARG" => ChainTypes::Protein
/// "LYS" => ChainTypes::Protein
/// "ASN" => ChainTypes::Protein
/// "ASP" => ChainTypes::Protein
/// "GLU" => ChainTypes::Protein
/// "SER" => ChainTypes::Protein
/// "THR" => ChainTypes::Protein
/// "GLN" => ChainTypes::Protein
/// "CYS" => ChainTypes::Protein
/// "HIS" => ChainTypes::Protein
/// "HSD" => ChainTypes::Protein
/// "HSP" => ChainTypes::Protein
/// "HSD" => ChainTypes::Protein
/// "SEC" => ChainTypes::Protein
/// "GLY" => ChainTypes::Protein
/// "PRO" => ChainTypes::Protein
/// "ALA" => ChainTypes::Protein
/// "VAL" => ChainTypes::Protein
/// "ILE" => ChainTypes::Protein
/// "LEU" => ChainTypes::Protein
/// "MET" => ChainTypes::Protein
/// "PHE" => ChainTypes::Protein
/// "TYR" => ChainTypes::Protein
/// "TRP" => ChainTypes::Protein
/// "ADE" => ChainTypes::NucleicAcid
/// "GUA" => ChainTypes::NucleicAcid
/// "THY" => ChainTypes::NucleicAcid
/// "CYT" => ChainTypes::NucleicAcid
/// "TIP3W" => ChainTypes::Wat
/// "POPC" => ChainTypes::Lipid
/// "POPE" => ChainTypes::Lipid
/// "HOH" => ChainTypes::Water
///
#[derive(PartialEq, Debug, Copy, Clone)]
pub enum ChainTypes {
Protein,
NucleicAcid,
Lipid,
Water,
Unknown,
}
impl ChainTypes {
/// Return a [`ChainTypes`] according to the "residue" of the atom
///
/// ````
/// use pdbparser::ChainTypes;
///
/// assert_eq!(ChainTypes::Protein, ChainTypes::get("trp"));
/// assert_eq!(ChainTypes::Lipid, ChainTypes::get("POPC"));
/// assert_eq!(ChainTypes::Unknown, ChainTypes::get("toto"));
///
/// ````
pub fn get(atom: &str) -> ChainTypes {
match ATOM_TYPES.get(&atom.to_uppercase()[..]) {
Some(x) => *x,
None => ChainTypes::Unknown,
}
}
}
lazy_static! {
static ref ATOM_TYPES: HashMap<&'static str, ChainTypes> = [
("ARG", ChainTypes::Protein),
("LYS", ChainTypes::Protein),
("ASN", ChainTypes::Protein),
("ASP", ChainTypes::Protein),
("GLU", ChainTypes::Protein),
("SER", ChainTypes::Protein),
("THR", ChainTypes::Protein),
("GLN", ChainTypes::Protein),
("CYS", ChainTypes::Protein),
("HIS", ChainTypes::Protein),
("HSD", ChainTypes::Protein),
("HSP", ChainTypes::Protein),
("HSD", ChainTypes::Protein),
("SEC", ChainTypes::Protein),
("GLY", ChainTypes::Protein),
("PRO", ChainTypes::Protein),
("ALA", ChainTypes::Protein),
("VAL", ChainTypes::Protein),
("ILE", ChainTypes::Protein),
("LEU", ChainTypes::Protein),
("MET", ChainTypes::Protein),
("PHE", ChainTypes::Protein),
("TYR", ChainTypes::Protein),
("TRP", ChainTypes::Protein),
("ADE", ChainTypes::NucleicAcid),
("GUA", ChainTypes::NucleicAcid),
("THY", ChainTypes::NucleicAcid),
("CYT", ChainTypes::NucleicAcid),
("TIP3W", ChainTypes::Water),
("POPC", ChainTypes::Lipid),
("POPE", ChainTypes::Lipid),
("HOH", ChainTypes::Water),
]
.iter()
.cloned()
.collect();
}
pub mod atom;
pub mod chain;
pub mod protein;
pub mod read_pdb;
pub mod residue;
pub mod write_pdb;
pub mod structure;
pub mod tools;
pub mod write_pdb;
mod selection_atom;
......@@ -3,7 +3,9 @@ use std::io::prelude::*;
use std::io::BufReader;
use std::process;
use super::protein::Protein;
use super::chain::ChainTypes;
use super::structure::Structure;
/// Parse the string to return a f32. The `trim` is used to remove
/// /n and spaces.
......@@ -42,71 +44,51 @@ fn parse_int(s: &str) -> i64 {
}
}
/// Parse the pdb file and return a protein structure
/// Parse the pdb file and return a [`Structure`]
///
/// # Examples
/// ```
/// use pdbparser;
/// let my_prot = pdbparser::parse_pdb("tests/tests_file/f2.pdb", "f2");
/// assert_eq!(66, my_prot.get_number_residue());
/// let my_struct = pdbparser::read_pdb("tests/tests_file/f2.pdb", "f2");
/// assert_eq!(66, my_struct.get_residue_number());
/// ```
pub fn parse_pdb(pdb: &str, name: &str) -> Protein {
// Allocate here to avoid multiple allocation for every call
let lst_res = vec![
"ARG", "HIS", "HSE", "HSD", "LYS", "LYS", "ASP", "GLU", "SER", "THR", "ASN", "GLN", "CYS",
"SEC", "GLY", "PRO", "ALA", "VAL", "ILE", "LEU", "MET", "PHE", "TYR", "TRP",
];
pub fn read_pdb(pdb: &str, name: &str) -> Structure {
// Check if the file exist and/or can be read
let pdb = match File::open(pdb) {
Ok(f) => f,
Err(e) => {
println!("Could not open the file \"{}\"\nError: {}", pdb, e);
eprintln!("Could not open the file \"{}\"\nError: {}", pdb, e);
process::exit(1);
}
};
let reader = BufReader::new(pdb);
let mut protein = Protein::new(name.to_string());
let mut structure = Structure::new(name.to_string());
for line in reader.lines() {
let l = line.unwrap();
if l.starts_with("HETAM") || l.starts_with("ATOM") {
// First get the resname.
// If the "residue" is a protein residue, continue to parse the line and add informations to the protein
// If the "residue" is a amino acid, continue to parse the line and add informations to the protein
// else continue to the next one line
let residue_name = &l[17..20].trim();
if is_protein_res(residue_name, &lst_res) {
let atom_name = &l[12..17].trim().to_string();
let chain = l[21..22].chars().next().unwrap();
let atom_number = parse_int(&l[6..11].to_string());
let residue_number = parse_int(&l[22..26].to_string());
let x = parse_float(&l[30..38].to_string());
let y = parse_float(&l[38..46].to_string());
let z = parse_float(&l[46..54].to_string());
// Add informations to the protein
protein.update_protein(
chain,
residue_name.to_string(),
residue_number as u64,
atom_name.clone(),
atom_number as u64,
[x, y, z],
);
}
}
}
protein
}
/// Test if the selected line is a residue
///
fn is_protein_res(r: &str, lst: &[&str]) -> bool {
let r = r.to_uppercase();
for res in lst {
if r == *res {
return true;
let atom_name = &l[12..17].trim().to_string();
let chain = l[21..22].chars().next().unwrap();
let atom_number = parse_int(&l[6..11].to_string());
let residue_number = parse_int(&l[22..26].to_string());
let x = parse_float(&l[30..38].to_string());
let y = parse_float(&l[38..46].to_string());
let z = parse_float(&l[46..54].to_string());
// Add informations to the Structure
structure.update_structure(
chain,
residue_name.to_string(),
residue_number as u64,
atom_name.clone(),
atom_number as u64,
[x, y, z],
);
}
}
false
structure
}
use super::atom::Atom;
/// A `Residue` is a sub-structure linked to a `Chain`.
/// It contain one or more `Atom`
/// A [`Residue`] is a sub-structure linked to a [`Chain`].
/// It contain one or more [`Atom`]
/// It stores the following properties:
/// - res name;
/// - res number (resid);
......@@ -15,7 +15,7 @@ pub struct Residue {
}
impl Residue {
/// Create a new Residue structure with an empty list of atom.
/// Create a new [`Residue`] structure with an empty list of atom.
/// The Residue have a name and a number
///
/// # Examples
......@@ -40,13 +40,13 @@ impl Residue {
self.name.clone()
}
/// Get the residue ID of the residue
/// Get the residue ID of the [`Residue`]
///
pub fn get_res_num(&self) -> u64 {
self.res_num
}
/// Get the number of Atom in the Residue
/// Get the number of [`Atom`] in the [`Residue`]
///
/// # Examples
///
......@@ -61,7 +61,7 @@ impl Residue {
self.lst_atom.len() as u64
}
/// Add an Atom structure to the Residue
/// Add an [`Atom`] structure to the [`Residue`]
///
/// # Examples
///
......
......@@ -99,7 +99,7 @@ pub fn parse_select(pattern: &str) -> Option<Vec<Select>> {
Some(lst_selection)
}
pub fn atom_match(sel: &Vec<Select>, chain: char, res_id: u64, is_back: bool) -> bool {
pub fn atom_match(sel: &[Select], chain: char, res_id: u64, is_back: bool) -> bool {
// For each pattern in sel, the pattern is compare to the caracteristics of the atom
// if at any moment, the caracteristics are not ok, the function return false
// In the end, it return true (consider everythings is ok)
......
This diff is collapsed.
......@@ -6,10 +6,10 @@ use std::io::BufReader;
use std::io::prelude::BufRead;
use std::process;
use super::protein::Protein;
use super::structure::Structure;
use super::atom::Atom;
/// Convert the protein to a FASTA sequence (1 residue as 1 char)
/// Convert the all amino acid to a FASTA sequence (1 residue as 1 char)
/// Consult the corresponding table to have the code 1 letter <-> 3 letters
/// [Wikipedia amino acid](https://en.wikipedia.org/wiki/Amino_acid)
///
......@@ -17,10 +17,10 @@ use super::atom::Atom;
/// ```
/// use pdbparser;
///
/// let my_prot = pdbparser::parse_pdb("tests/tests_file/f2.pdb", "f2");
/// assert_eq!("TSPQPYSIERTIRWLTYQVANSLALVSEADKIMQTEYMKMIQNSGEITDRGEAILRLLKTNKHYEH", pdbparser::fasta_seq(my_prot));
/// let my_struct = pdbparser::read_pdb("tests/tests_file/f2.pdb", "f2");
/// assert_eq!("TSPQPYSIERTIRWLTYQVANSLALVSEADKIMQTEYMKMIQNSGEITDRGEAILRLLKTNKHYEH", pdbparser::fasta_seq(&my_struct));
/// ```
pub fn fasta_seq(my_prot: Protein) -> String {
pub fn fasta_seq(my_struct: &Structure) -> String {
let res: HashMap<&str, char> = [
("ARG", 'R'),
("LYS", 'K'),
......@@ -51,14 +51,14 @@ pub fn fasta_seq(my_prot: Protein) -> String {
.cloned()
.collect();
let mut fasta = String::with_capacity(my_prot.get_number_residue() as usize);
//TODO: Change the with_capacity(my_struct.get_residue_number()) because get all residue (dna, lipid, etc..)
let mut fasta = String::with_capacity(my_struct.get_residue_number() as usize);
for chain in &my_prot.lst_chain {
for chain in &my_struct.chains {
for residue in &chain.lst_res {
match res.get(&residue.name()[..]) {
Some(r) => fasta.push(*r),
None => (),
};
if let Some(r) = res.get(&residue.name()[..]) {
fasta.push(*r);
}
}
}
fasta
......@@ -124,19 +124,18 @@ fn atom_mass() -> HashMap<String, f32> {
/// ```
///use pdbparser;
/// let my_prot = pdbparser::parse_pdb("tests/tests_file/5jpq.pdb", "5jqp");
/// let my_prot = pdbparser::read_pdb("tests/tests_file/5jpq.pdb", "5jqp");
/// let atom_number=my_prot.get_number_atom() as usize;
/// let atom_number=my_prot.get_atom_number() as usize;
/// let mut atoms: Vec<pdbparser::Atom> = Vec::with_capacity(atom_number);
/// for chain in my_prot.lst_chain {
/// for chain in my_prot.chains {
/// for residue in chain.lst_res {
/// for atom in residue.lst_atom {
/// atoms.push(atom);
/// }
/// }
/// }
/// let tab = pdbparser::center_of_mass(atoms);
/// assert_eq!([235, 233, 262], [tab[0] as u32, tab[1] as u32, tab[2] as u32])
/// assert_eq!([237.98595, 241.93814, 231.15921], pdbparser::center_of_mass(atoms))
/// ```
pub fn center_of_mass(atom_list: Vec<Atom>) -> [f32; 3] {
......
use super::protein::Protein;
use super::structure::Structure;
use std::fs::File;
use std::io;
use std::io::Write;
/// Write a PDB file for the `Protein`.
/// Write a PDB file for the [`Structure`].
/// Be careful, the protein is write with the atom numbers in its structure. Remind to use the method
/// my_protein.refine_dialing() before !
pub fn write_pdb(my_prot: &Protein, file: &str) -> io::Result<()> {
/// my_struct.refine_dialing() before !
pub fn write_pdb(my_prot: &Structure, file: &str) -> io::Result<()> {
let mut output_pdb = File::create(file)?;
for chain in &my_prot.lst_chain {
for chain in &my_prot.chains {
let chain_name = chain.get_name();
for residue in &chain.lst_res {
let res_name = residue.name();
......
......@@ -2,41 +2,41 @@ extern crate pdbparser;
#[test]
fn parse_f2() {
let my_prot = pdbparser::parse_pdb("tests/tests_file/f2.pdb", "f2");
assert_eq!("f2", my_prot.name());
assert_eq!(1, my_prot.get_number_chain());
assert_eq!(66, my_prot.get_number_residue());
assert_eq!(1085, my_prot.get_number_atom());
let my_struct = pdbparser::read_pdb("tests/tests_file/f2.pdb", "f2");
assert_eq!("f2", my_struct.name());
assert_eq!(1, my_struct.get_chain_number());
assert_eq!(66, my_struct.get_residue_number());
assert_eq!(1085, my_struct.get_atom_number());
}
#[test]
fn parse_f2_adn() {
let my_prot = pdbparser::parse_pdb("tests/tests_file/f2_adn.pdb", "f2");
assert_eq!(1, my_prot.get_number_chain());
assert_eq!(66, my_prot.get_number_residue());
assert_eq!(1085, my_prot.get_number_atom());
let my_struct = pdbparser::read_pdb("tests/tests_file/f2_adn.pdb", "f2");
assert_eq!(5, my_struct.get_chain_number());
assert_eq!(8382, my_struct.get_residue_number());
assert_eq!(47740, my_struct.get_atom_number());
}
#[test]
fn parse_trp() {
let my_prot = pdbparser::parse_pdb("tests/tests_file/trp_MD.pdb", "trp");
assert_eq!(1, my_prot.get_number_chain());
assert_eq!(704, my_prot.get_number_residue());
assert_eq!(10688, my_prot.get_number_atom());
let my_struct = pdbparser::read_pdb("tests/tests_file/trp_MD.pdb", "trp");
assert_eq!(3, my_struct.get_chain_number());
assert_eq!(9970, my_struct.get_residue_number());
assert_eq!(396_109, my_struct.get_atom_number());
}
#[test]
fn parse_5jpq() {
let my_prot = pdbparser::parse_pdb("tests/tests_file/5jpq.pdb", "5jpq");
assert_eq!(35, my_prot.get_number_chain());
assert_eq!(8173, my_prot.get_number_residue());
assert_eq!(44801, my_prot.get_number_atom());
let my_struct = pdbparser::read_pdb("tests/tests_file/5jpq.pdb", "5jpq");
assert_eq!(56, my_struct.get_chain_number());
assert_eq!(15066, my_struct.get_residue_number());
assert_eq!(95839, my_struct.get_atom_number());
}
#[test]
fn f2_res() {
let mut my_prot = pdbparser::parse_pdb("tests/tests_file/f2.pdb", "f2");
let chain_a = my_prot.get_chain_ref('A').unwrap();
let mut my_struct = pdbparser::read_pdb("tests/tests_file/f2.pdb", "f2");
let chain_a = my_struct.get_chain_ref('A').unwrap();
let res = chain_a.get_residue_ref(1).unwrap();
assert_eq!("THR", res.name());
......@@ -50,12 +50,13 @@ fn f2_res() {
assert_eq!("HSD", res.name());
}
#[test]
fn mass_center_f2() {
let my_prot = pdbparser::parse_pdb("tests/tests_file/f2.pdb", "f2");
let my_struct = pdbparser::read_pdb("tests/tests_file/f2.pdb", "f2");
let mut atoms: Vec<pdbparser::Atom> = Vec::new();
for chain in my_prot.lst_chain {
for chain in my_struct.chains {
for residue in chain.lst_res {
for atom in residue.lst_atom {
atoms.push(atom);
......@@ -68,15 +69,15 @@ fn mass_center_f2() {
#[test]
fn mass_center_5jpq() {
let my_prot = pdbparser::parse_pdb("tests/tests_file/5jpq.pdb", "5jpq");
let my_struct = pdbparser::read_pdb("tests/tests_file/5jpq.pdb", "5jpq");
let mut atoms: Vec<pdbparser::Atom> = Vec::new();
for chain in my_prot.lst_chain {
for chain in my_struct.chains {
for residue in chain.lst_res {
for atom in residue.lst_atom {
atoms.push(atom);
}
}
}
assert_eq!([235.51479, 233.86612, 262.81265], pdbparser::center_of_mass(atoms))
}
\ No newline at end of file
assert_eq!([237.98595, 241.93814, 231.15921], pdbparser::center_of_mass(atoms))
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment