From 5334ce4c53de1bc9c20e42c7f959362613384e05 Mon Sep 17 00:00:00 2001 From: oboulle <olivier.boulle@inria.fr> Date: Fri, 4 Oct 2024 17:30:58 +0200 Subject: [PATCH] init --- consensus_on_all_clusters.sh | 49 ++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100755 consensus_on_all_clusters.sh diff --git a/consensus_on_all_clusters.sh b/consensus_on_all_clusters.sh new file mode 100755 index 0000000..b03fb93 --- /dev/null +++ b/consensus_on_all_clusters.sh @@ -0,0 +1,49 @@ +#!/bin/bash + + + +echo "______________ consensus on all clusters ______________" + +tests_dir="partitioning/new_tests/100k/" + +ref_file=$tests_dir/references_100k.fasta +clusters_dir=$tests_dir/clusters_100k +consensus_dir=$tests_dir/consensus_100k + +consensus_script="reads_consensus_class.py" +start_primer="GTTCTACAGTCCGACGATCC" +stop_primer="TTGGCACCCGAGAATTCCAC" +kmer_size="21" + +#for file in "$clusters_dir"/*.fasta; do +# cluster_name=$(basename -- "$file") +# python3 "$consensus_script" -i "$file" -o "$consensus_dir"/$cluster_name --start $start_primer --stop $stop_primer --kmer_size $kmer_size --min_occ 2 --seq_size 200 +# +#done + +cat $consensus_dir/*.fasta > $consensus_dir/0_full_consensus.fasta + +grep -v "^>" $ref_file > $tests_dir/ref_sequences.txt # get seqs from referrences +grep -v "^>" $consensus_dir/0_full_consensus.fasta > $tests_dir/consensus_sequences.txt # get seqs from consensuses + +# sort the sequences for the comm command +sort $tests_dir/ref_sequences.txt > $tests_dir/sorted_ref_sequences.txt +sort $tests_dir/consensus_sequences.txt > $tests_dir/sorted_consensus_sequences.txt + + +comm -12 $tests_dir/sorted_ref_sequences.txt $tests_dir/sorted_consensus_sequences.txt > $consensus_dir/correct_consensus.txt + +num_common_sequences=$(wc -l < $consensus_dir/correct_consensus.txt) +num_total_ref=$(wc -l < $tests_dir/ref_sequences.txt) + +rm $tests_dir/ref_sequences.txt $tests_dir/consensus_sequences.txt $tests_dir/sorted_ref_sequences.txt $tests_dir/sorted_consensus_sequences.txt + + +echo "number of correct consensuses found : $num_common_sequences out of a total of $num_total_ref referrences" + +#-------------- Exit --------------# +echo "___END !___" + +exit 0 + +# scp -r oboulle@dnarxiv.irisa.fr:~/Documents/result_analysis/fragment_assembly_analysis/results/demultiplexed_v2 ~/Documents/result_analysis/fragment_assembly_analysis/results -- GitLab