From 1fdc9fe3ad6c498000df7293438009105fcf34b0 Mon Sep 17 00:00:00 2001 From: Terry Cojean <terry.cojean@inria.fr> Date: Wed, 16 Dec 2015 14:39:49 +0000 Subject: [PATCH] Explain the restriction proposition for getrf_nopiv --- timing/time_zgetrf_nopiv_tile.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/timing/time_zgetrf_nopiv_tile.c b/timing/time_zgetrf_nopiv_tile.c index b074afc58..304d9d333 100644 --- a/timing/time_zgetrf_nopiv_tile.c +++ b/timing/time_zgetrf_nopiv_tile.c @@ -47,6 +47,12 @@ RunTest(int *iparam, double *dparam, morse_time_t *t_) MORSE_zlacpy_Tile(MorseUpperLower, descA, descAC); } + /** + * Consider this optimization on some heterogenous platforms and matrix sizes. + * Often, TRSM kernel on GPU yields significantly less performance rate than GEMM, + * while performances are similar on CPU. On this algorithm it is therefore + * recommended to execute all TRSMs (~low amount) on CPU to increase GPU efficiency. + */ //RUNTIME_zlocality_onerestrict( MORSE_TRSM, STARPU_CPU ); START_TIMING(); -- GitLab