Commit 56ad5f66 authored by NOEL Philippe's avatar NOEL Philippe

Add Enumeration for AtomTypes while parsing PDB file

parent 3767c9fc
......@@ -2,8 +2,10 @@ use std::fs::File;
use std::io::prelude::*;
use std::io::BufReader;
use std::process;
use std::collections::HashMap;
use super::protein::Protein;
use super::atom::AtomTypes;
/// Parse the string to return a f32. The `trim` is used to remove
/// /n and spaces.
......@@ -52,10 +54,42 @@ fn parse_int(s: &str) -> i64 {
/// ```
pub fn parse_pdb(pdb: &str, name: &str) -> Protein {
// Allocate here to avoid multiple allocation for every call
let lst_res = vec![
"ARG", "HIS", "HSE", "HSD", "LYS", "LYS", "ASP", "GLU", "SER", "THR", "ASN", "GLN", "CYS",
"SEC", "GLY", "PRO", "ALA", "VAL", "ILE", "LEU", "MET", "PHE", "TYR", "TRP",
];
const atom_type: HashMap<&str, AtomTypes> = [
("ARG", AtomTypes::AminoAcid),
("LYS", AtomTypes::AminoAcid),
("ASN", AtomTypes::AminoAcid),
("ASP", AtomTypes::AminoAcid),
("GLU", AtomTypes::AminoAcid),
("SER", AtomTypes::AminoAcid),
("THR", AtomTypes::AminoAcid),
("GLN", AtomTypes::AminoAcid),
("CYS", AtomTypes::AminoAcid),
("HIS", AtomTypes::AminoAcid),
("HSD", AtomTypes::AminoAcid),
("HSP", AtomTypes::AminoAcid),
("HSD", AtomTypes::AminoAcid),
("SEC", AtomTypes::AminoAcid),
("GLY", AtomTypes::AminoAcid),
("PRO", AtomTypes::AminoAcid),
("ALA", AtomTypes::AminoAcid),
("VAL", AtomTypes::AminoAcid),
("ILE", AtomTypes::AminoAcid),
("LEU", AtomTypes::AminoAcid),
("MET", AtomTypes::AminoAcid),
("PHE", AtomTypes::AminoAcid),
("TYR", AtomTypes::AminoAcid),
("TRP", AtomTypes::AminoAcid),
("ADE", AtomTypes::NucleicAcid),
("GUA", AtomTypes::NucleicAcid),
("THY", AtomTypes::NucleicAcid),
("CYT", AtomTypes::NucleicAcid),
("TIP3W", AtomTypes::Water),
("POPC", AtomTypes::Lipid),
("POPE", AtomTypes::Lipid),
]
.iter()
.cloned()
.collect();
// Check if the file exist and/or can be read
let pdb = match File::open(pdb) {
......@@ -76,24 +110,30 @@ pub fn parse_pdb(pdb: &str, name: &str) -> Protein {
// If the "residue" is a protein residue, continue to parse the line and add informations to the protein
// else continue to the next one line
let residue_name = &l[17..20].trim();
if is_protein_res(residue_name, &lst_res) {
let atom_name = &l[12..17].trim().to_string();
let chain = l[21..22].chars().next().unwrap();
let atom_number = parse_int(&l[6..11].to_string());
let residue_number = parse_int(&l[22..26].to_string());
let x = parse_float(&l[30..38].to_string());
let y = parse_float(&l[38..46].to_string());
let z = parse_float(&l[46..54].to_string());
// Add informations to the protein
protein.update_protein(
chain,
residue_name.to_string(),
residue_number as u64,
atom_name.clone(),
atom_number as u64,
[x, y, z],
);
}
match atom_type.get(&residue_name.to_uppercase()[..]) {
Some(AtomTypes::AminoAcid) => {
let atom_name = &l[12..17].trim().to_string();
let chain = l[21..22].chars().next().unwrap();
let atom_number = parse_int(&l[6..11].to_string());
let residue_number = parse_int(&l[22..26].to_string());
let x = parse_float(&l[30..38].to_string());
let y = parse_float(&l[38..46].to_string());
let z = parse_float(&l[46..54].to_string());
// Add informations to the protein
protein.update_protein(
chain,
residue_name.to_string(),
residue_number as u64,
atom_name.clone(),
atom_number as u64,
[x, y, z],
);
}
Some(AtomTypes::Water) => (),
Some(AtomTypes::Lipid) => (),
Some(AtomTypes::NucleicAcid) => (),
_ => () //TODO: Warning with unrecognize atom
}
}
}
protein
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment