Commit 7f933d17 authored by Mikaël Salson's avatar Mikaël Salson Committed by Mathieu Giraud

tools.js: get_codons and get_mutations

Those two functions are useful to detect synonymous mutations.
We also have fallbacks in case we don't want to split the
sequence in codons.

See #2056
parent e4858e93
var SILENT="silent";
var SUBST="substitution";
var INS="insertion";
var DEL="deletion";
/**
* Get codons from two aligned sequences
* @pre both sequences are aligned together ref.length == seq.length
* @param ref: reference the sequence
* @param seq: the sequence aligned to ref
* @param frame: the frame in the reference sequence
* 0: first codon starts at first position, etc.
* @return an object containing a property ref and a property seq whose values
* are a list of codons. Some “codons” from seq may have a length ≠ 3.
* All codons from ref have 3 nucleotides (apart from the first/last codons), but may
* contain dashes.
*/
function get_codons(ref, seq, frame) {
var codons_ref = [];
var codons_seq = [];
var current_codon_ref = '';
var current_codon_seq = '';
var pos = 0;
if (frame == undefined) {
return {ref: [ref], seq: [seq]};
}
// Search first nucleotide pos
for (pos; pos < ref.length; pos++) {
if (ref[pos] != '-') {
if (frame == 0)
break;
current_codon_ref += ref[pos];
frame--;
}
if (seq[pos] != '-') {
current_codon_seq += seq[pos];
}
}
if (current_codon_seq != '' || current_codon_ref != '') {
codons_ref.push(current_codon_ref);
codons_seq.push(current_codon_seq);
current_codon_ref = '';
current_codon_seq = '';
}
var nb_nuc = 0;
for (; pos < ref.length; pos++) {
if (nb_nuc == 3 ||
(ref[pos] != '-' && current_codon_seq.length > 0 &&
nb_nuc == 0)) {
codons_ref.push(current_codon_ref);
codons_seq.push(current_codon_seq);
current_codon_ref = '';
current_codon_seq = '';
nb_nuc = 0;
}
if (ref[pos] == '-') {
if (seq[pos] != '-') {
current_codon_seq += seq[pos];
current_codon_ref += '-';
}
} else {
current_codon_ref += ref[pos];
current_codon_seq += seq[pos];
nb_nuc ++;
}
}
if (current_codon_ref.length > 0)
codons_ref.push(current_codon_ref);
if (current_codon_seq.length > 0)
codons_seq.push(current_codon_seq);
return {ref : codons_ref, seq : codons_seq};
}
/**
* Get positions of mutations and their type between two aligned sequences
* @pre both sequences are aligned together
* @param ref: reference the sequence
* @param seq: the sequence aligned to ref
* @param frame: the frame in the reference sequence
* 0: first codon starts at first position, etc.
* @return a dictionary whose keys are positions of mutations in the alignment
* and whose values are a type of mutation either SUBST/SILENT/INS/DEL
*/
function get_mutations(ref, seq, frame) {
var codons = get_codons(ref, seq, frame);
var mutations = {};
var nb_pos = 0;
console.log(codons);
for (var i = 0; i < codons.ref.length ; i++) {
for (var p = 0; p < codons.ref[i].length; p++) {
if (codons.ref[i][p] != codons.seq[i][p]) {
if (codons.ref[i][p] == '-') {
mutations[nb_pos] = INS;
} else if (codons.seq[i][p] == '-') {
mutations[nb_pos] = DEL;
} else if (codons.seq[i].length == 3 &&
codons.ref[i].length == 3 &&
frame != undefined &&
tableAA.hasOwnProperty(codons.seq[i]) &&
tableAA[codons.seq[i]] == tableAA[codons.ref[i]]) {
mutations[nb_pos] = SILENT;
} else {
mutations[nb_pos] = SUBST;
}
}
nb_pos++;
}
}
return mutations;
}
/**
* Find the position of the nth occurence of needle
*
......
QUnit.module("Tools", {
});
QUnit.test("test get_codons", function(assert) {
var r = 'ATGATAGAC';
var s = 'AAACCCGGG';
var codons = get_codons(r, s, 0);
assert.deepEqual(codons, {ref : ['ATG', 'ATA', 'GAC'], seq : ['AAA', 'CCC', 'GGG']});
codons = get_codons(r, s, 1);
assert.deepEqual(codons, {ref : ['A', 'TGA', 'TAG', 'AC'], seq : ['A', 'AAC', 'CCG', 'GG']});
codons = get_codons(r, s, 2);
assert.deepEqual(codons, {ref : ['AT', 'GAT', 'AGA', 'C'], seq : ['AA', 'ACC', 'CGG', 'G']});
r = 'ATG--ATAGACAG';
s = 'AAACCCG-GGGTT';
codons = get_codons(r, s, 0);
assert.deepEqual(codons, {ref : ['ATG', '--', 'ATA', 'GAC', 'AG'],
seq : ['AAA', 'CC', 'CG-', 'GGG', 'TT']});
codons = get_codons(r, s, 1);
assert.deepEqual(codons, {ref : ['A', 'TG--A', 'TAG', 'ACA', 'G'],
seq : ['A', 'AACCC', 'G-G', 'GGT', 'T']});
codons = get_codons(r, s, 2);
assert.deepEqual(codons, {ref : ['AT', 'G--AT', 'AGA', 'CAG'],
seq : ['AA', 'ACCCG', '-GG', 'GTT']});
});
QUnit.test("test get_mutations", function(assert) {
var r = 'ATAGATAGATAG';
var mutations = get_mutations(r, r, 0);
assert.equal (Object.keys(mutations).length, 0, "No mutation");
mutations = get_mutations(r, r, 1);
assert.equal (Object.keys(mutations).length, 0, "No mutation");
mutations = get_mutations(r, r, 2);
assert.equal (Object.keys(mutations).length, 0, "No mutation");
// GAT > GAc (give same D AA)
var s = 'ATAGACAGATAG';
// 0123456789
mutations = get_mutations(r, s, 0);
assert.equal (Object.keys(mutations).length, 1, "Single mutation");
assert.equal (mutations[5], SILENT, "Silent mutation");
// ATA > AcA (I > T)
mutations = get_mutations(r, s, 1);
assert.equal (Object.keys(mutations).length, 1, "Single mutation");
assert.equal (mutations[5], SUBST, "Silent mutation");
// TAG > cAG (* > Q)
mutations = get_mutations(r, s, 2);
assert.equal (Object.keys(mutations).length, 1, "Single mutation");
assert.equal (mutations[5], SUBST, "Silent mutation");
r = 'ATAGATAG-TAG';
s = 'ATA-ATCGATAG';
// 0123456789
// AG-T > cGAT (no base in the reference → can't tell if the mutation is silent)
mutations = get_mutations(r, s, 0);
assert.equal(Object.keys(mutations).length, 3, "Three mutations");
assert.deepEqual(mutations, {3 : DEL, 6 : SUBST, 8 : INS});
// ATA > ATc (I)
mutations = get_mutations(r, s, 1);
assert.equal(Object.keys(mutations).length, 3, "Three mutations");
assert.deepEqual(mutations, {3 : DEL, 6 : SILENT, 8 : INS});
// TAG > TcG (* > S)
mutations = get_mutations(r, s, 2);
assert.equal(Object.keys(mutations).length, 3, "Three mutations");
assert.deepEqual(mutations, {3 : DEL, 6 : SUBST, 8 : INS});
// Same example as before but with common indels to check that they are ignored
r = 'ATAGAT-AG-TA--G';
s = 'ATA-AT-CGATA--G';
// 0123456789
// AG-T > cGAT
mutations = get_mutations(r, s, 0);
assert.equal(Object.keys(mutations).length, 3, "Three mutations");
assert.deepEqual(mutations, {3 : DEL, 6 : SUBST, 8 : INS});
// ATA > ATC (I)
mutations = get_mutations(r, s, 1);
assert.equal(Object.keys(mutations).length, 3, "Three mutations");
assert.deepEqual(mutations, {3 : DEL, 6 : SILENT, 8 : INS});
// TAG > TcG (* > S)
mutations = get_mutations(r, s, 2);
assert.equal(Object.keys(mutations).length, 3, "Three mutations");
assert.deepEqual(mutations, {3 : DEL, 6 : SUBST, 8 : INS});
mutations = get_mutations(r, s);
assert.equal(Object.keys(mutations).length, 3, "Three mutations without phase");
assert.deepEqual(mutations, {3 : DEL, 6 : SUBST, 8 : INS});
});
QUnit.test("test nth_ocurrence", function(assert) {
var str = "needle needle needle needle";
var m = nth_ocurrence(str, 'n', 3);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment