diff --git a/CMakeLists.txt b/CMakeLists.txt
index 938b126402673a53adbad5601573c1ed6e57c715..0f187ceb21e68302b1c4e8256e9da0b75a6e6777 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -83,6 +83,8 @@ endif()
 # ========================
 find_library(M_LIBRARIES m)
 
+find_package(Threads REQUIRED)
+
 # ========================
 # Warning flags
 # ========================
@@ -111,6 +113,10 @@ if ( M_LIBRARIES )
   target_link_libraries(test_FEMBEM PUBLIC ${M_LIBRARIES} )
 endif()
 
+if(CMAKE_THREAD_LIBS_INIT)
+  target_link_libraries(test_FEMBEM PUBLIC ${CMAKE_THREAD_LIBS_INIT})
+endif()
+
 # ========================
 # INSTALL
 # ========================
diff --git a/include/main.h b/include/main.h
index 884debecdf2bd84356e51560e3dc0b1d59483631..a6d1b12d2d723e96dd31fb32aa657c6c498c4ddb 100644
--- a/include/main.h
+++ b/include/main.h
@@ -3,6 +3,9 @@
 #include <math.h>
 #include <complex.h>
 #include <errno.h>
+#include <stdbool.h>
+#include <limits.h>
+#include <pthread.h>
 #include "hmat/hmat.h"
 #include "util.h"
 
@@ -12,10 +15,12 @@ extern char * batch_output;
 
 extern double computation_time;
 
-extern double ram_usage_peak;
+extern size_t ram_usage_peak;
 
 extern double solution_relative_error;
 
+extern bool is_parent_running;
+
 extern hmat_interface_t * interface;
 
 extern double epsilon;
@@ -73,6 +78,8 @@ double compression_to_epsilon(const char * compression);
 int run_one();
 int run_batch();
 void reset_meters();
+int get_vmrss(FILE * input, size_t * vmrss);
+void * rss_loop(void * __dummy);
 void prepare_hmat(int, int, int, int, int*, int*, int*, int*, void*, hmat_block_info_t *);
 void advanced_compute_hmat(struct hmat_block_compute_context_t*);
 void update_progress(hmat_progress_t * ctx);
diff --git a/src/main.c b/src/main.c
index 1bd85b25ab924e19fd0568af95d6a6baa3d0592f..d0759161076271a3a826e9937a89cc86d9a76532 100644
--- a/src/main.c
+++ b/src/main.c
@@ -3,8 +3,9 @@
 char * batch_input = NULL;
 char * batch_output = NULL;
 double computation_time = 0.;
-double ram_usage_peak = 0.;
+size_t ram_usage_peak = 0UL;
 double solution_relative_error = 0.;
+bool is_parent_running = false;
 hmat_interface_t * interface = NULL;
 double epsilon = 1e-3;
 hmat_progress_t progress;
@@ -18,6 +19,35 @@ void update_progress(hmat_progress_t * ctx) {
   Mpf_progressBar(ctx->current, ctx->max);
 }
 
+int get_vmrss(FILE * input, size_t * vmrss) {
+  char * buffer = (char *) malloc(1024); CHKPTRQ(buffer);
+  while((buffer = fgets(buffer, 1023, input))) {
+    if(strncmp(buffer, "VmRSS:", 6) == 0) {
+      int ret = sscanf(buffer, "VmRSS: %lu", vmrss);
+      if(ret == EOF) {
+        SETERRQ(1, "Unable to read the value of VmRSS in /proc/self/status!");
+      }
+    }
+  }
+  free(buffer);
+  return 0;
+}
+
+void * rss_loop(void * __dummy) {
+  int ierr;
+  while(is_parent_running) {
+    FILE * parent = fopen("/proc/self/status", "r"); CHKPTRA(parent);
+    size_t VmRSS = 0UL;
+    ierr = get_vmrss(parent, &VmRSS); CHKERRA(ierr);
+    if(VmRSS > ram_usage_peak) {
+      ram_usage_peak = VmRSS;
+    }
+    fclose(parent);
+    sleep(1);
+  }
+  return NULL;
+}
+
 double compression_to_epsilon(const char * compression) {
   if(strncmp(compression, "medium", 6) == 0) {
     return 1e-6;
@@ -79,8 +109,8 @@ read:
   reset_meters();
   run_one();
   ierr = fprintf(
-    output, "%d,%s,%f,%f,%e\n",
-    nbPts, compression, computation_time, ram_usage_peak,
+    output, "%d,%s,%f,%lu,%e\n",
+    nbPts, compression, computation_time, ram_usage_peak / 1024UL,
     solution_relative_error
   );
   if(ierr < 0) {
@@ -104,6 +134,11 @@ end:
 
 int run_one() {
   int ierr;
+  pthread_t child;
+
+  is_parent_running = true;
+
+  ierr = pthread_create(&child, NULL, rss_loop, NULL); CHKERRQ(ierr);
 
   printf("[minisolver] hmat initialization ... ");
   interface = calloc(1, sizeof(hmat_interface_t)); CHKPTRQ(interface);
@@ -137,6 +172,7 @@ int run_one() {
   
   double relative_error;
   ierr = testHMAT(&relative_error); CHKERRQ(ierr) ;
+  is_parent_running = false;
 
   printf("[minisolver] computation completed\n");
 
@@ -152,6 +188,8 @@ int run_one() {
   free(interface);
   printf("done\n");
 
+  pthread_join(child, NULL);
+
   return 0;
 }
 
@@ -201,6 +239,9 @@ int main(int argc, char **argv) {
   }
 
   ierr = run_one(); CHKERRQ(ierr);
+
+  printf("[minisolver] total computation time = %f\n", computation_time);
+  printf("[minisolver] memory usage peak = %lu MiB\n", ram_usage_peak / 1024UL);
   
   return 0;
 }