From 08abd456b8c065b842d3d8de68dbc78ff690a4f2 Mon Sep 17 00:00:00 2001
From: Jonathan Rouzaud-Cornabas <jonathan.rouzaud-cornabas@inria.fr>
Date: Mon, 19 Feb 2018 14:49:28 +0100
Subject: [PATCH] working (but not well) openmp code for SIMD Individual

---
 CMakeLists.txt                   |   2 -
 src/libaevol/BitSet_SIMD.h       |  21 +++++-
 src/libaevol/SIMD_Individual.cpp | 115 +++++++++++++++++++++----------
 3 files changed, 99 insertions(+), 39 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index c4abbfd8d..6e7af6043 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -3,8 +3,6 @@ project(raevol_cuda)
 
 set(__OPENMP_GPU)
 
-add_definitions(-DWITH_BITSET)
-add_definitions(-D_STATIC_BITSET)
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
 
 set(SOURCE_FILES main.cpp src/libaevol/ProxyIndividual.cpp src/libaevol/ProxyIndividual.h src/libaevol/algorithm_cuda.cpp src/libaevol/algorithm_cuda.h src/generate_lookup_map_pow.cpp src/regulation_network.cpp src/ode_solver_only.cpp src/ode_solver_only.h src/ode_solver_only_gpu.h src/ode_solver_only_omp_gpu.cpp src/ode_solver_only_omp_gpu.h src/rk_solver.cpp src/rk_solver.h src/libaevol/CUDA_Individual_cu.h src/libaevol/SIMD_Individual.cpp src/libaevol/SIMD_Individual.h src/libaevol/Dna_SIMD.cpp src/libaevol/Dna_SIMD.h src/libaevol/MutationEvent.cpp src/libaevol/MutationEvent.h src/libaevol/DnaMutator.cpp src/libaevol/DnaMutator.h src/libaevol/Stats_SIMD.cpp src/libaevol/Stats_SIMD.h src/libaevol/BitSet_SIMD.cpp src/libaevol/BitSet_SIMD.h)
diff --git a/src/libaevol/BitSet_SIMD.h b/src/libaevol/BitSet_SIMD.h
index e984dcede..8078268c7 100644
--- a/src/libaevol/BitSet_SIMD.h
+++ b/src/libaevol/BitSet_SIMD.h
@@ -11,6 +11,10 @@
 #include <cstdio>
 #include <bitset>
 
+#ifdef _DYNAMIC_BITSET
+#include <boost/dynamic_bitset.hpp>
+#endif
+
 namespace aevol {
 
 #define BITSET_BLOCK_SIZE INT32_C(1024)
@@ -78,6 +82,8 @@ class BitSet_SIMD {
       data_[(position / CHAR_TO_BITS)] ^= (1 << (position % CHAR_TO_BITS));
 #elif _STATIC_BITSET
       data_[position].flip();
+#elif _DYNAMIC_BITSET
+      data_[position].flip();
 #endif
     }
 
@@ -234,13 +240,24 @@ class BitSet_SIMD {
       return dist[0] && dist[1] && dist[2] && dist[3];
     }
 
-    __inline bool is_shine_dalgarno_protein_start(bool LEADING, int pos) {
+    __inline bool is_shine_dalgarno_protein_start(bool LEADING, int32_t pos) {
 
       bool start[9] = {false, false, false,
                        false, false, false,
                        false, false, false};
 
 #ifdef _STATIC_BITSET
+#pragma omp simd
+      for (int32_t k = 0; k < 9; k++) {
+        int32_t k_t = k >= 6 ? k + 4 : k;
+        if (LEADING)
+          (data_[pos + k_t >= length_ ? pos + k_t - length_ : pos + k_t] ==
+           get(BITSET_SHINE_DAL_SEQ_LEAD,k)) ? (start[k] = true) : (start[k] = false);
+        else
+          (data_[pos - k_t < 0 ? length_ + (pos - k_t) : pos - k_t] ==
+           get(BITSET_SHINE_DAL_SEQ_LAG,k)) ? (start[k] = true) : (start[k] = false);
+      }
+      /*
       if (LEADING) {
         (data_[pos + 0 >= length_ ? pos + 0 - length_ : pos + 0] ==
          get(BITSET_SHINE_DAL_SEQ_LEAD, 0)) ? (start[0] = true)
@@ -299,7 +316,7 @@ class BitSet_SIMD {
         (data_[pos - 12 < 0 ? length_ + (pos - 12) : pos - 12] ==
          get(BITSET_SHINE_DAL_SEQ_LAG, 8)) ? (start[8] = true)
                                            : (start[8] = false);
-      }
+      }*/
 #endif
 
 #ifdef _DYNAMIC_CUSTOM_BITSET
diff --git a/src/libaevol/SIMD_Individual.cpp b/src/libaevol/SIMD_Individual.cpp
index d2b10f3d8..708e4e4aa 100644
--- a/src/libaevol/SIMD_Individual.cpp
+++ b/src/libaevol/SIMD_Individual.cpp
@@ -442,8 +442,14 @@ void SIMD_Individual::start_stop_RNA() {
               if (indiv_id == 6)
                 printf("Adding promoters %d at %d\n",dna_pos,prom_idx);*/
 
-              internal_simd_struct[indiv_id]->promoters[prom_idx] = nprom;
-              internal_simd_struct[indiv_id]->leading_prom_pos[dna_pos] = prom_idx;
+#pragma omp critical(promoters)
+              {
+                internal_simd_struct[indiv_id]->promoters[prom_idx] = nprom;
+              }
+#pragma omp critical(add_to_promoters_lead)
+              {
+                internal_simd_struct[indiv_id]->leading_prom_pos[dna_pos] = prom_idx;
+              }
             }
           }
 
@@ -506,8 +512,14 @@ void SIMD_Individual::start_stop_RNA() {
                     internal_simd_struct[indiv_id]->count_prom + 1;
               }
 
-              internal_simd_struct[indiv_id]->promoters[prom_idx] = nprom;
-              internal_simd_struct[indiv_id]->lagging_prom_pos[dna_pos] = prom_idx;
+#pragma omp critical(promoters)
+              {
+                internal_simd_struct[indiv_id]->promoters[prom_idx] = nprom;
+              }
+#pragma omp critical(add_to_promoters_lags)
+              {
+                internal_simd_struct[indiv_id]->lagging_prom_pos[dna_pos] = prom_idx;
+              }
             }
           }
 
@@ -538,14 +550,26 @@ void SIMD_Individual::start_stop_RNA() {
 void SIMD_Individual::opt_prom_compute_RNA() {
 
   int nb_indiv = exp_m_->nb_indivs();
-#pragma omp parallel
-#pragma omp single
+
+#pragma omp barrier
+
   for (int indiv_id = 0; indiv_id < nb_indiv; indiv_id++) {
     if (exp_m_->dna_mutator_array_[indiv_id]->hasMutate()) {
       internal_simd_struct[indiv_id]->proteins.clear();
       internal_simd_struct[indiv_id]->rnas.clear();
       internal_simd_struct[indiv_id]->terminator_lead.clear();
       internal_simd_struct[indiv_id]->terminator_lag.clear();
+    }
+  }
+
+#pragma omp parallel
+#pragma omp single
+  for (int indiv_id = 0; indiv_id < nb_indiv; indiv_id++) {
+    if (exp_m_->dna_mutator_array_[indiv_id]->hasMutate()) {
+      /*internal_simd_struct[indiv_id]->proteins.clear();
+      internal_simd_struct[indiv_id]->rnas.clear();
+      internal_simd_struct[indiv_id]->terminator_lead.clear();
+      internal_simd_struct[indiv_id]->terminator_lag.clear();*/
 
       for (int rna_idx = 0; rna_idx <
                             (int) internal_simd_struct[indiv_id]->promoters.size();
@@ -553,13 +577,29 @@ void SIMD_Individual::opt_prom_compute_RNA() {
 
 #pragma omp task firstprivate(indiv_id, rna_idx)
         {
-          if (internal_simd_struct[indiv_id]->promoters[rna_idx] != nullptr) {
+          promoterStruct* prom;
+#pragma omp critical(promoters)
+          {
+            prom = internal_simd_struct[indiv_id]->promoters[rna_idx];
+          }
+
+          if (prom != nullptr) {
+            int prom_pos;
+            bool lead_lag;
+            double prom_error;
+#pragma omp critical(promoters)
+            {
+              prom_pos = internal_simd_struct[indiv_id]->promoters[rna_idx]->pos;
+              lead_lag = internal_simd_struct[indiv_id]->promoters[rna_idx]->leading_or_lagging;
+              prom_error = fabs(
+                  ((float) internal_simd_struct[indiv_id]->promoters[rna_idx]->error));
+            }
 
-            if (internal_simd_struct[indiv_id]->promoters[rna_idx]->leading_or_lagging) {
+            if (lead_lag) {
 //        if (indiv_id == 152) printf("Searching for RNA (OPT) for indiv %d RNA %d LEAD\n",indiv_id,rna_idx);
               /* Search for terminators */
               int cur_pos =
-                  internal_simd_struct[indiv_id]->promoters[rna_idx]->pos + 22;
+                  prom_pos + 22;
               cur_pos = cur_pos >= dna_size[indiv_id] ? cur_pos -
                                                         dna_size[indiv_id] :
                         cur_pos;
@@ -579,8 +619,8 @@ void SIMD_Individual::opt_prom_compute_RNA() {
 
                 if (is_term)
 #else
+                  #pragma omp simd aligned(internal_simd_struct[indiv_id]->dna_->data_:64)
                   for (int t_motif_id = 0; t_motif_id < 4; t_motif_id++)
-                  // LEADING
                     term_dist_leading +=
                         internal_simd_struct[indiv_id]->dna_->data_[cur_pos + t_motif_id >= dna_size[indiv_id] ? cur_pos +
                                                                          t_motif_id -
@@ -627,34 +667,34 @@ void SIMD_Individual::opt_prom_compute_RNA() {
                 }*/
                 int32_t rna_length = 0;
 
-                if (internal_simd_struct[indiv_id]->promoters[rna_idx]->pos
+                if (prom_pos
                     > rna_end)
                   rna_length = dna_size[indiv_id] -
-                               internal_simd_struct[indiv_id]->promoters[rna_idx]->pos
+                      prom_pos
                                + rna_end;
                 else
-                  rna_length = rna_end - internal_simd_struct[indiv_id]->
-                      promoters[rna_idx]->pos;
+                  rna_length = rna_end - prom_pos;
 
                 rna_length -= 21;
 
                 if (rna_length > 0) {
 #pragma omp critical(rnas)
-                  internal_simd_struct[indiv_id]->rnas.emplace_back(
-                      internal_simd_struct[indiv_id]->promoters[rna_idx]->pos,
-                      rna_end,
-                      !internal_simd_struct[indiv_id]->promoters[rna_idx]->leading_or_lagging,
-                      1.0 -
-                      fabs(
-                          ((float) internal_simd_struct[indiv_id]->promoters[rna_idx]->error)) /
-                      5.0, rna_length);
+                  {
+                    internal_simd_struct[indiv_id]->rnas.emplace_back(
+                        prom_pos,
+                        rna_end,
+                        !lead_lag,
+                        1.0 -
+                        prom_error /
+                        5.0, rna_length);
+                  }
                 }
               }
 //        if (indiv_id == 152) printf("Hop to next\n");
             } else {
               /* Search for terminator */
               int cur_pos =
-                  internal_simd_struct[indiv_id]->promoters[rna_idx]->pos - 22;
+                  prom_pos - 22;
               cur_pos = cur_pos < 0 ? dna_size[indiv_id] + (cur_pos) : cur_pos;
               int start_pos = cur_pos;
               bool terminator_found = false;
@@ -671,7 +711,8 @@ void SIMD_Individual::opt_prom_compute_RNA() {
 
                 if (is_term)
 #else
-                  for (int t_motif_id = 0; t_motif_id < 4; t_motif_id++) {
+                #pragma omp simd aligned(internal_simd_struct[indiv_id]->dna_->data_:64)
+                for (int t_motif_id = 0; t_motif_id < 4; t_motif_id++) {
                     term_dist_lagging +=
                         internal_simd_struct[indiv_id]->dna_->data_[cur_pos - t_motif_id < 0 ? cur_pos -
                                                                         t_motif_id +
@@ -730,14 +771,14 @@ void SIMD_Individual::opt_prom_compute_RNA() {
                 }*/
                 int32_t rna_length = 0;
 
-                if (internal_simd_struct[indiv_id]->promoters[rna_idx]->pos <
+                if (prom_pos <
                     rna_end)
                   rna_length =
-                      internal_simd_struct[indiv_id]->promoters[rna_idx]->pos +
+                      prom_pos +
                       dna_size[indiv_id] - rna_end;
                 else
                   rna_length =
-                      internal_simd_struct[indiv_id]->promoters[rna_idx]->pos -
+                      prom_pos -
                       rna_end;
 
                 rna_length -= 21;
@@ -745,14 +786,15 @@ void SIMD_Individual::opt_prom_compute_RNA() {
                 if (rna_length >= 0) {
 
 #pragma omp critical(rnas)
-                  internal_simd_struct[indiv_id]->rnas.emplace_back(
-                      internal_simd_struct[indiv_id]->promoters[rna_idx]->pos,
-                      rna_end,
-                      !internal_simd_struct[indiv_id]->promoters[rna_idx]->leading_or_lagging,
-                      1.0 -
-                      fabs(
-                          ((float) internal_simd_struct[indiv_id]->promoters[rna_idx]->error)) /
-                      5.0, rna_length);
+                  {
+                    internal_simd_struct[indiv_id]->rnas.emplace_back(
+                        prom_pos,
+                        rna_end,
+                        !lead_lag,
+                        1.0 -
+                        prom_error /
+                        5.0, rna_length);
+                  }
                 }
 //        if (indiv_id == 152) printf("Hop to next\n");
 
@@ -970,6 +1012,8 @@ void SIMD_Individual::start_protein() {
               start = internal_simd_struct[indiv_id]->dna_->bitset_->is_shine_dalgarno_protein_start(
                   true, c_pos);
 #else
+
+              #pragma omp simd aligned(internal_simd_struct[indiv_id]->dna_->data_,SHINE_DAL_SEQ_LEAD:64)
               for (int k = 0; k < 9; k++) {
                 k_t = k >= 6 ? k + 4 : k;
                 t_pos = c_pos + k_t >= dna_size[indiv_id] ? c_pos + k_t -
@@ -992,6 +1036,7 @@ void SIMD_Individual::start_protein() {
               start = internal_simd_struct[indiv_id]->dna_->bitset_->is_shine_dalgarno_protein_start(
                   false, c_pos);
 #else
+#pragma omp simd aligned(internal_simd_struct[indiv_id]->dna_->data_,SHINE_DAL_SEQ_LAG:64)
               for (int k = 0; k < 9; k++) {
                 k_t = k >= 6 ? k + 4 : k;
                 t_pos =
-- 
GitLab