From 56ad5f66ad25b588447a645d9437d8e3524c93ed Mon Sep 17 00:00:00 2001 From: Philippe Noel Date: Tue, 9 Apr 2019 15:45:53 +0200 Subject: [PATCH] Add Enumeration for AtomTypes while parsing PDB file --- src/pdb/read_pdb.rs | 84 +++++++++++++++++++++++++++++++++------------ 1 file changed, 62 insertions(+), 22 deletions(-) diff --git a/src/pdb/read_pdb.rs b/src/pdb/read_pdb.rs index c1a1e86..888441a 100644 --- a/src/pdb/read_pdb.rs +++ b/src/pdb/read_pdb.rs @@ -2,8 +2,10 @@ use std::fs::File; use std::io::prelude::*; use std::io::BufReader; use std::process; +use std::collections::HashMap; use super::protein::Protein; +use super::atom::AtomTypes; /// Parse the string to return a f32. The `trim` is used to remove /// /n and spaces. @@ -52,10 +54,42 @@ fn parse_int(s: &str) -> i64 { /// ``` pub fn parse_pdb(pdb: &str, name: &str) -> Protein { // Allocate here to avoid multiple allocation for every call - let lst_res = vec![ - "ARG", "HIS", "HSE", "HSD", "LYS", "LYS", "ASP", "GLU", "SER", "THR", "ASN", "GLN", "CYS", - "SEC", "GLY", "PRO", "ALA", "VAL", "ILE", "LEU", "MET", "PHE", "TYR", "TRP", - ]; + const atom_type: HashMap<&str, AtomTypes> = [ + ("ARG", AtomTypes::AminoAcid), + ("LYS", AtomTypes::AminoAcid), + ("ASN", AtomTypes::AminoAcid), + ("ASP", AtomTypes::AminoAcid), + ("GLU", AtomTypes::AminoAcid), + ("SER", AtomTypes::AminoAcid), + ("THR", AtomTypes::AminoAcid), + ("GLN", AtomTypes::AminoAcid), + ("CYS", AtomTypes::AminoAcid), + ("HIS", AtomTypes::AminoAcid), + ("HSD", AtomTypes::AminoAcid), + ("HSP", AtomTypes::AminoAcid), + ("HSD", AtomTypes::AminoAcid), + ("SEC", AtomTypes::AminoAcid), + ("GLY", AtomTypes::AminoAcid), + ("PRO", AtomTypes::AminoAcid), + ("ALA", AtomTypes::AminoAcid), + ("VAL", AtomTypes::AminoAcid), + ("ILE", AtomTypes::AminoAcid), + ("LEU", AtomTypes::AminoAcid), + ("MET", AtomTypes::AminoAcid), + ("PHE", AtomTypes::AminoAcid), + ("TYR", AtomTypes::AminoAcid), + ("TRP", AtomTypes::AminoAcid), + ("ADE", AtomTypes::NucleicAcid), + ("GUA", AtomTypes::NucleicAcid), + ("THY", AtomTypes::NucleicAcid), + ("CYT", AtomTypes::NucleicAcid), + ("TIP3W", AtomTypes::Water), + ("POPC", AtomTypes::Lipid), + ("POPE", AtomTypes::Lipid), + ] + .iter() + .cloned() + .collect(); // Check if the file exist and/or can be read let pdb = match File::open(pdb) { @@ -76,24 +110,30 @@ pub fn parse_pdb(pdb: &str, name: &str) -> Protein { // If the "residue" is a protein residue, continue to parse the line and add informations to the protein // else continue to the next one line let residue_name = &l[17..20].trim(); - if is_protein_res(residue_name, &lst_res) { - let atom_name = &l[12..17].trim().to_string(); - let chain = l[21..22].chars().next().unwrap(); - let atom_number = parse_int(&l[6..11].to_string()); - let residue_number = parse_int(&l[22..26].to_string()); - let x = parse_float(&l[30..38].to_string()); - let y = parse_float(&l[38..46].to_string()); - let z = parse_float(&l[46..54].to_string()); - // Add informations to the protein - protein.update_protein( - chain, - residue_name.to_string(), - residue_number as u64, - atom_name.clone(), - atom_number as u64, - [x, y, z], - ); - } + match atom_type.get(&residue_name.to_uppercase()[..]) { + Some(AtomTypes::AminoAcid) => { + let atom_name = &l[12..17].trim().to_string(); + let chain = l[21..22].chars().next().unwrap(); + let atom_number = parse_int(&l[6..11].to_string()); + let residue_number = parse_int(&l[22..26].to_string()); + let x = parse_float(&l[30..38].to_string()); + let y = parse_float(&l[38..46].to_string()); + let z = parse_float(&l[46..54].to_string()); + // Add informations to the protein + protein.update_protein( + chain, + residue_name.to_string(), + residue_number as u64, + atom_name.clone(), + atom_number as u64, + [x, y, z], + ); + } + Some(AtomTypes::Water) => (), + Some(AtomTypes::Lipid) => (), + Some(AtomTypes::NucleicAcid) => (), + _ => () //TODO: Warning with unrecognize atom + } } } protein -- 2.22.0