read_pdb.rs 3.29 KB
Newer Older
NOEL Philippe's avatar
NOEL Philippe committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113
use std::fs::File;
use std::io::prelude::*;
use std::io::BufReader;
use std::process;

use super::protein::Protein;


/// Parse the string to return a f32. The `trim` is used to remove
/// /n and spaces.
///
/// # Errors
/// Will return 0.0 if the String cannot be convert and print the error
///
fn parse_float(s: &str) -> f32 {
    match s.trim().parse::<f32>() {
        Ok(n) => n,
        Err(e) => {
            println!("{}", e);
            0.0
        }
    }
}

/// Parse the string to return a i64. The `trim` is used to remove
/// /n and spaces.
/// In large PDB, atom number can be > 99,999. 
/// In VMD, the atom number is in hexadecimal after 99,999
///
/// # Errors  
/// Will return 0 if the String cannot be convert and print the error
///
fn parse_int(s: &str) -> i64 {
    match s.trim().parse::<i64>() {
        Ok(n) => n,
        Err(e) => {
            match i64::from_str_radix(s.trim(), 16) {
                Ok(n) => n,
                Err(_) => {
                    println!("{}", e);
                    0
                }
            }
        }
    }
}

/// Parse the pdb file and return a protein structure
/// 
/// # Examples
/// ```
/// use pdbparser;
/// let my_prot = pdbparser::parse_pdb("tests/tests_file/f2.pdb", "f2");
/// assert_eq!(66, my_prot.get_number_residue());
/// ```
pub fn parse_pdb(pdb: &str, name: &str) -> Protein {
    

    // Allocate here to avoid multiple allocation for every call 
    let lst_res = vec![
        "ARG", "HIS", "HSE", "HSD", "LYS", "LYS", "ASP", "GLU", "SER", "THR", "ASN",
        "GLN", "CYS", "SEC", "GLY", "PRO", "ALA", "VAL", "ILE", "LEU",
        "MET", "PHE", "TYR", "TRP"
    ];

    // Check if the file exist and/or can be read
    let pdb = match File::open(pdb) {
        Ok(f) => f,
        Err(e) => {
            println!("Could not open the file \"{}\"\nError: {}", pdb, e);
            process::exit(1);
        }
    };

    let reader = BufReader::new(pdb);
    let mut protein = Protein::new(name.to_string());
    
    for line in reader.lines() {
        let l = line.unwrap();
        if l.starts_with("HETAM") || l.starts_with("ATOM") {
            // First get the resname. 
            // If the "residue" is a protein residue, continue to parse the line and add informations to the protein
            // else continue to the next one line
            let residue_name = &l[17..20].trim();
            if is_protein_res(residue_name, &lst_res) {

                let atom_name = &l[12..17].trim().to_string();
                let chain = l[21..22].chars().next().unwrap();
                let atom_number = parse_int(&l[6..11].to_string());
                let residue_number = parse_int(&l[22..26].to_string());
                let x = parse_float(&l[30..38].to_string());
                let y = parse_float(&l[38..46].to_string());
                let z = parse_float(&l[46..54].to_string());
                // Add informations to the protein
                protein.update_protein(chain, residue_name.to_string(), residue_number as u64, atom_name.clone(), atom_number as u64, [x, y, z]);
            }
        }
    }
    protein
}
/// Test if the selected line is a residue
/// 
fn is_protein_res(r: &str, lst: &[&str]) -> bool {

    let r = r.to_uppercase();

    for res in lst {
        if r == *res {
            return true
        }
    }
    false
}