Commit 56ad5f66 authored by NOEL Philippe's avatar NOEL Philippe

Add Enumeration for AtomTypes while parsing PDB file

parent 3767c9fc
...@@ -2,8 +2,10 @@ use std::fs::File; ...@@ -2,8 +2,10 @@ use std::fs::File;
use std::io::prelude::*; use std::io::prelude::*;
use std::io::BufReader; use std::io::BufReader;
use std::process; use std::process;
use std::collections::HashMap;
use super::protein::Protein; use super::protein::Protein;
use super::atom::AtomTypes;
/// Parse the string to return a f32. The `trim` is used to remove /// Parse the string to return a f32. The `trim` is used to remove
/// /n and spaces. /// /n and spaces.
...@@ -52,10 +54,42 @@ fn parse_int(s: &str) -> i64 { ...@@ -52,10 +54,42 @@ fn parse_int(s: &str) -> i64 {
/// ``` /// ```
pub fn parse_pdb(pdb: &str, name: &str) -> Protein { pub fn parse_pdb(pdb: &str, name: &str) -> Protein {
// Allocate here to avoid multiple allocation for every call // Allocate here to avoid multiple allocation for every call
let lst_res = vec![ const atom_type: HashMap<&str, AtomTypes> = [
"ARG", "HIS", "HSE", "HSD", "LYS", "LYS", "ASP", "GLU", "SER", "THR", "ASN", "GLN", "CYS", ("ARG", AtomTypes::AminoAcid),
"SEC", "GLY", "PRO", "ALA", "VAL", "ILE", "LEU", "MET", "PHE", "TYR", "TRP", ("LYS", AtomTypes::AminoAcid),
]; ("ASN", AtomTypes::AminoAcid),
("ASP", AtomTypes::AminoAcid),
("GLU", AtomTypes::AminoAcid),
("SER", AtomTypes::AminoAcid),
("THR", AtomTypes::AminoAcid),
("GLN", AtomTypes::AminoAcid),
("CYS", AtomTypes::AminoAcid),
("HIS", AtomTypes::AminoAcid),
("HSD", AtomTypes::AminoAcid),
("HSP", AtomTypes::AminoAcid),
("HSD", AtomTypes::AminoAcid),
("SEC", AtomTypes::AminoAcid),
("GLY", AtomTypes::AminoAcid),
("PRO", AtomTypes::AminoAcid),
("ALA", AtomTypes::AminoAcid),
("VAL", AtomTypes::AminoAcid),
("ILE", AtomTypes::AminoAcid),
("LEU", AtomTypes::AminoAcid),
("MET", AtomTypes::AminoAcid),
("PHE", AtomTypes::AminoAcid),
("TYR", AtomTypes::AminoAcid),
("TRP", AtomTypes::AminoAcid),
("ADE", AtomTypes::NucleicAcid),
("GUA", AtomTypes::NucleicAcid),
("THY", AtomTypes::NucleicAcid),
("CYT", AtomTypes::NucleicAcid),
("TIP3W", AtomTypes::Water),
("POPC", AtomTypes::Lipid),
("POPE", AtomTypes::Lipid),
]
.iter()
.cloned()
.collect();
// Check if the file exist and/or can be read // Check if the file exist and/or can be read
let pdb = match File::open(pdb) { let pdb = match File::open(pdb) {
...@@ -76,24 +110,30 @@ pub fn parse_pdb(pdb: &str, name: &str) -> Protein { ...@@ -76,24 +110,30 @@ pub fn parse_pdb(pdb: &str, name: &str) -> Protein {
// If the "residue" is a protein residue, continue to parse the line and add informations to the protein // If the "residue" is a protein residue, continue to parse the line and add informations to the protein
// else continue to the next one line // else continue to the next one line
let residue_name = &l[17..20].trim(); let residue_name = &l[17..20].trim();
if is_protein_res(residue_name, &lst_res) { match atom_type.get(&residue_name.to_uppercase()[..]) {
let atom_name = &l[12..17].trim().to_string(); Some(AtomTypes::AminoAcid) => {
let chain = l[21..22].chars().next().unwrap(); let atom_name = &l[12..17].trim().to_string();
let atom_number = parse_int(&l[6..11].to_string()); let chain = l[21..22].chars().next().unwrap();
let residue_number = parse_int(&l[22..26].to_string()); let atom_number = parse_int(&l[6..11].to_string());
let x = parse_float(&l[30..38].to_string()); let residue_number = parse_int(&l[22..26].to_string());
let y = parse_float(&l[38..46].to_string()); let x = parse_float(&l[30..38].to_string());
let z = parse_float(&l[46..54].to_string()); let y = parse_float(&l[38..46].to_string());
// Add informations to the protein let z = parse_float(&l[46..54].to_string());
protein.update_protein( // Add informations to the protein
chain, protein.update_protein(
residue_name.to_string(), chain,
residue_number as u64, residue_name.to_string(),
atom_name.clone(), residue_number as u64,
atom_number as u64, atom_name.clone(),
[x, y, z], atom_number as u64,
); [x, y, z],
} );
}
Some(AtomTypes::Water) => (),
Some(AtomTypes::Lipid) => (),
Some(AtomTypes::NucleicAcid) => (),
_ => () //TODO: Warning with unrecognize atom
}
} }
} }
protein protein
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment