Commit c69e5f0a authored by Mathieu Faverge's avatar Mathieu Faverge

Update the workspace sizes to call ztpmXXt kernels

parent 2f9dd95a
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
* @author Mathieu Faverge * @author Mathieu Faverge
* @author Emmanuel Agullo * @author Emmanuel Agullo
* @author Cedric Castagnede * @author Cedric Castagnede
* @date 2010-11-15 * @date 2018-11-09
* @precisions normal z -> s d c * @precisions normal z -> s d c
* *
*/ */
...@@ -65,10 +65,10 @@ void chameleon_pzgelqf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D ...@@ -65,10 +65,10 @@ void chameleon_pzgelqf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D
} }
/* /*
* zgelqt = A->nb * (ib+1) * zgelqt = A->nb * (ib+1)
* zunmlq = A->nb * ib * zunmlq = A->nb * ib
* ztslqt = A->nb * (ib+1) * ztslqt = A->nb * (ib+1)
* ztsmlq = A->nb * ib * ztpmlqt = A->nb * ib
*/ */
ws_worker = A->nb * (ib+1); ws_worker = A->nb * (ib+1);
...@@ -76,8 +76,8 @@ void chameleon_pzgelqf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D ...@@ -76,8 +76,8 @@ void chameleon_pzgelqf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D
#if defined(CHAMELEON_USE_CUDA) #if defined(CHAMELEON_USE_CUDA)
/* Worker space /* Worker space
* *
* zunmqr = A->nb * ib * zunmlq = A->nb * ib
* ztsmqr = 2 * A->nb * ib * ztpmlqt = 2 * A->nb * ib
*/ */
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 ); ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
#endif #endif
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
* @version 1.0.0 * @version 1.0.0
* @author Mathieu Faverge * @author Mathieu Faverge
* @author Raphael Boucherie * @author Raphael Boucherie
* @date 2017-05-17 * @date 2018-11-09
* @precisions normal z -> s d c * @precisions normal z -> s d c
* *
*/ */
...@@ -69,10 +69,10 @@ void chameleon_pzgelqf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t ...@@ -69,10 +69,10 @@ void chameleon_pzgelqf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
/* Allocation of temporary (scratch) working space */ /* Allocation of temporary (scratch) working space */
#if defined(CHAMELEON_USE_CUDA) #if defined(CHAMELEON_USE_CUDA)
/* /*
* zunmqr = A->nb * ib * zunmlq = A->nb * ib
* ztpmqrt = 2 * A->nb * ib * ztpmlqt = 3 * A->nb * ib
*/ */
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 ); ws_worker = chameleon_max( ws_worker, ib * A->nb * 3 );
#endif #endif
ws_worker *= sizeof(CHAMELEON_Complex64_t); ws_worker *= sizeof(CHAMELEON_Complex64_t);
......
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
* @author Mathieu Faverge * @author Mathieu Faverge
* @author Emmanuel Agullo * @author Emmanuel Agullo
* @author Cedric Castagnede * @author Cedric Castagnede
* @date 2010-11-15 * @date 2018-11-09
* @precisions normal z -> s d c * @precisions normal z -> s d c
* *
*/ */
...@@ -62,10 +62,10 @@ void chameleon_pzgelqfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM ...@@ -62,10 +62,10 @@ void chameleon_pzgelqfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
} }
/* /*
* zgelqt = A->nb * (ib+1) * zgelqt = A->nb * (ib+1)
* zunmlq = A->nb * ib * zunmlq = A->nb * ib
* ztplqt = A->nb * (ib+1) * ztplqt = A->nb * (ib+1)
* ztpmlq = A->nb * ib * ztpmlqt = A->nb * ib
*/ */
ws_worker = A->nb * (ib+1); ws_worker = A->nb * (ib+1);
...@@ -73,10 +73,10 @@ void chameleon_pzgelqfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM ...@@ -73,10 +73,10 @@ void chameleon_pzgelqfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
#if defined(CHAMELEON_USE_CUDA) #if defined(CHAMELEON_USE_CUDA)
/* Worker space /* Worker space
* *
* zunmqr = A->nb * ib * zunmlq = A->nb * ib
* ztpmqr = 2 * A->nb * ib * ztpmlqt = 3 * A->nb * ib
*/ */
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 ); ws_worker = chameleon_max( ws_worker, ib * A->nb * 3 );
#endif #endif
ws_worker *= sizeof(CHAMELEON_Complex64_t); ws_worker *= sizeof(CHAMELEON_Complex64_t);
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
* @author Mathieu Faverge * @author Mathieu Faverge
* @author Emmanuel Agullo * @author Emmanuel Agullo
* @author Cedric Castagnede * @author Cedric Castagnede
* @date 2010-11-15 * @date 2018-11-09
* @precisions normal z -> s d c * @precisions normal z -> s d c
* *
*/ */
...@@ -60,10 +60,10 @@ void chameleon_pzgeqrf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D ...@@ -60,10 +60,10 @@ void chameleon_pzgeqrf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D
} }
/* /*
* zgeqrt = A->nb * (ib+1) * zgeqrt = A->nb * (ib+1)
* zunmqr = A->nb * ib * zunmqr = A->nb * ib
* ztsqrt = A->nb * (ib+1) * ztsqrt = A->nb * (ib+1)
* ztsmqr = A->nb * ib * ztpmqrt = A->nb * ib
*/ */
ws_worker = A->nb * (ib+1); ws_worker = A->nb * (ib+1);
...@@ -71,8 +71,8 @@ void chameleon_pzgeqrf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D ...@@ -71,8 +71,8 @@ void chameleon_pzgeqrf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D
#if defined(CHAMELEON_USE_CUDA) #if defined(CHAMELEON_USE_CUDA)
/* Worker space /* Worker space
* *
* zunmqr = A->nb * ib * zunmqr = A->nb * ib
* ztsmqr = 2 * A->nb * ib * ztpmqrt = 2 * A->nb * ib
*/ */
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 ); ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
#endif #endif
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
* @version 1.0.0 * @version 1.0.0
* @author Mathieu Faverge * @author Mathieu Faverge
* @author Raphael Boucherie * @author Raphael Boucherie
* @date 2017-05-17 * @date 2018-11-09
* @precisions normal z -> s d c * @precisions normal z -> s d c
* *
*/ */
...@@ -74,10 +74,10 @@ void chameleon_pzgeqrf_param( int genD, int K, ...@@ -74,10 +74,10 @@ void chameleon_pzgeqrf_param( int genD, int K,
/* Allocation of temporary (scratch) working space */ /* Allocation of temporary (scratch) working space */
#if defined(CHAMELEON_USE_CUDA) #if defined(CHAMELEON_USE_CUDA)
/* /*
* zunmqr = A->nb * ib * zunmqr = A->nb * ib
* ztpmqrt = 2 * A->nb * ib * ztpmqrt = 3 * A->nb * ib
*/ */
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 ); ws_worker = chameleon_max( ws_worker, ib * A->nb * 3 );
#endif #endif
ws_worker *= sizeof(CHAMELEON_Complex64_t); ws_worker *= sizeof(CHAMELEON_Complex64_t);
......
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
* @author Mathieu Faverge * @author Mathieu Faverge
* @author Emmanuel Agullo * @author Emmanuel Agullo
* @author Cedric Castagnede * @author Cedric Castagnede
* @date 2010-11-15 * @date 2018-11-09
* @precisions normal z -> s d c * @precisions normal z -> s d c
* *
*/ */
...@@ -62,10 +62,10 @@ void chameleon_pzgeqrfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM ...@@ -62,10 +62,10 @@ void chameleon_pzgeqrfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
} }
/* /*
* zgeqrt = A->nb * (ib+1) * zgeqrt = A->nb * (ib+1)
* zunmqr = A->nb * ib * zunmqr = A->nb * ib
* ztpqrt = A->nb * (ib+1) * ztpqrt = A->nb * (ib+1)
* ztpmqr = A->nb * ib * ztpmqrt = A->nb * ib
*/ */
ws_worker = A->nb * (ib+1); ws_worker = A->nb * (ib+1);
...@@ -73,10 +73,10 @@ void chameleon_pzgeqrfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM ...@@ -73,10 +73,10 @@ void chameleon_pzgeqrfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
#if defined(CHAMELEON_USE_CUDA) #if defined(CHAMELEON_USE_CUDA)
/* Worker space /* Worker space
* *
* zunmqr = A->nb * ib * zunmqr = A->nb * ib
* ztpmqr = 2 * A->nb * ib * ztpmqrr = 3 * A->nb * ib
*/ */
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 ); ws_worker = chameleon_max( ws_worker, ib * A->nb * 3 );
#endif #endif
ws_worker *= sizeof(CHAMELEON_Complex64_t); ws_worker *= sizeof(CHAMELEON_Complex64_t);
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
* @version 1.0.0 * @version 1.0.0
* @author Hatem Ltaief * @author Hatem Ltaief
* @author Azzam Haidar * @author Azzam Haidar
* @date 2010-11-15 * @date 2018-11-09
* @precisions normal z -> s d c * @precisions normal z -> s d c
* *
*/ */
...@@ -74,9 +74,9 @@ void chameleon_pzhetrd_he2hb(cham_uplo_t uplo, ...@@ -74,9 +74,9 @@ void chameleon_pzhetrd_he2hb(cham_uplo_t uplo,
#if defined(CHAMELEON_USE_CUDA) #if defined(CHAMELEON_USE_CUDA)
/* Worker space /* Worker space
* *
* zunmqr = A->nb * ib * zunmqr = A->nb * ib
* ztsmqr = 2 * A->nb * ib * ztsmqr = 2 * A->nb * ib
* zherfb = A->nb * ib * zherfb = A->nb * ib
*/ */
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 ); ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
#endif #endif
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
* *
* @version 1.0.0 * @version 1.0.0
* @author Mathieu Faverge * @author Mathieu Faverge
* @date 2016-12-15 * @date 2018-11-09
* @precisions normal z -> s d c * @precisions normal z -> s d c
* *
*/ */
...@@ -66,9 +66,9 @@ void chameleon_pztpgqrt( int KT, int L, ...@@ -66,9 +66,9 @@ void chameleon_pztpgqrt( int KT, int L,
#if defined(CHAMELEON_USE_CUDA) #if defined(CHAMELEON_USE_CUDA)
/* Worker space /* Worker space
* *
* ztpmqrt = 2 * Q1->nb * ib * ztpmqrt = 3 * Q1->nb * ib
*/ */
ws_worker = chameleon_max( ws_worker, ib * Q1->nb * 2 ); ws_worker = chameleon_max( ws_worker, ib * Q1->nb * 3 );
#endif #endif
ws_worker *= sizeof(CHAMELEON_Complex64_t); ws_worker *= sizeof(CHAMELEON_Complex64_t);
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
* *
* @version 1.0.0 * @version 1.0.0
* @author Mathieu Faverge * @author Mathieu Faverge
* @date 2016-12-15 * @date 2018-11-09
* @precisions normal z -> s d c * @precisions normal z -> s d c
* *
*/ */
...@@ -61,9 +61,9 @@ void chameleon_pztpqrt( int L, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_desc_t *T, ...@@ -61,9 +61,9 @@ void chameleon_pztpqrt( int L, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_desc_t *T,
/* Allocation of temporary (scratch) working space */ /* Allocation of temporary (scratch) working space */
#if defined(CHAMELEON_USE_CUDA) #if defined(CHAMELEON_USE_CUDA)
/* /*
* ztpmqrt = 2 * A->nb * ib * ztpmqrt = 3 * A->nb * ib
*/ */
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 ); ws_worker = chameleon_max( ws_worker, ib * A->nb * 3 );
#endif #endif
ws_worker *= sizeof(CHAMELEON_Complex64_t); ws_worker *= sizeof(CHAMELEON_Complex64_t);
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
* @author Mathieu Faverge * @author Mathieu Faverge
* @author Emmanuel Agullo * @author Emmanuel Agullo
* @author Cedric Castagnede * @author Cedric Castagnede
* @date 2010-11-15 * @date 2018-11-09
* @precisions normal z -> s d c * @precisions normal z -> s d c
* *
*/ */
...@@ -67,8 +67,8 @@ void chameleon_pzunglq( int genD, CHAM_desc_t *A, CHAM_desc_t *Q, CHAM_desc_t *T ...@@ -67,8 +67,8 @@ void chameleon_pzunglq( int genD, CHAM_desc_t *A, CHAM_desc_t *Q, CHAM_desc_t *T
} }
/* /*
* zunmlq = A->nb * ib * zunmlq = A->nb * ib
* ztpmlq = A->nb * ib * ztpmlqt = A->nb * ib
*/ */
ws_worker = A->nb * ib; ws_worker = A->nb * ib;
...@@ -76,8 +76,8 @@ void chameleon_pzunglq( int genD, CHAM_desc_t *A, CHAM_desc_t *Q, CHAM_desc_t *T ...@@ -76,8 +76,8 @@ void chameleon_pzunglq( int genD, CHAM_desc_t *A, CHAM_desc_t *Q, CHAM_desc_t *T
#if defined(CHAMELEON_USE_CUDA) #if defined(CHAMELEON_USE_CUDA)
/* Worker space /* Worker space
* *
* zunmlq = A->nb * ib * zunmlq = A->nb * ib
* ztpmlq = 2 * A->nb * ib * ztpmlqt = 2 * A->nb * ib
*/ */
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 ); ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
#endif #endif
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
* @version 1.0.0 * @version 1.0.0
* @author Mathieu Faverge * @author Mathieu Faverge
* @author Raphael Boucherie * @author Raphael Boucherie
* @date 2017-05-17 * @date 2018-11-09
* @precisions normal z -> s d c * @precisions normal z -> s d c
* *
*/ */
...@@ -63,18 +63,18 @@ void chameleon_pzunglq_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t ...@@ -63,18 +63,18 @@ void chameleon_pzunglq_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
} }
/* /*
* zunmqr = A->nb * ib * zunmlq = A->nb * ib
* ztpmqr = A->nb * ib * ztpmlqt = A->nb * ib
*/ */
ws_worker = A->nb * ib; ws_worker = A->nb * ib;
#if defined(CHAMELEON_USE_CUDA) #if defined(CHAMELEON_USE_CUDA)
/* Worker space /* Worker space
* *
* zunmqr = A->nb * ib * zunmlq = A->nb * ib
* ztpmqr = 2 * A->nb * ib * ztpmlqt = 3 * A->nb * ib
*/ */
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 ); ws_worker = chameleon_max( ws_worker, ib * A->nb * 3 );
#endif #endif
ws_worker *= sizeof(CHAMELEON_Complex64_t); ws_worker *= sizeof(CHAMELEON_Complex64_t);
......
...@@ -18,7 +18,7 @@ ...@@ -18,7 +18,7 @@
* @author Mathieu Faverge * @author Mathieu Faverge
* @author Emmanuel Agullo * @author Emmanuel Agullo
* @author Cedric Castagnede * @author Cedric Castagnede
* @date 2011-05-24 * @date 2018-11-09
* @precisions normal z -> s d c * @precisions normal z -> s d c
* *
*/ */
...@@ -64,18 +64,18 @@ void chameleon_pzunglqrh( int genD, int BS, ...@@ -64,18 +64,18 @@ void chameleon_pzunglqrh( int genD, int BS,
} }
/* /*
* zunmqr = A->nb * ib * zunmlq = A->nb * ib
* ztpmqr = A->nb * ib * ztpmlqt = A->nb * ib
*/ */
ws_worker = A->nb * ib; ws_worker = A->nb * ib;
#if defined(CHAMELEON_USE_CUDA) #if defined(CHAMELEON_USE_CUDA)
/* Worker space /* Worker space
* *
* zunmqr = A->nb * ib * zunmlq = A->nb * ib
* ztpmqr = 2 * A->nb * ib * ztpmlqt = 3 * A->nb * ib
*/ */
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 ); ws_worker = chameleon_max( ws_worker, ib * A->nb * 3 );
#endif #endif
ws_worker *= sizeof(CHAMELEON_Complex64_t); ws_worker *= sizeof(CHAMELEON_Complex64_t);
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
* @author Mathieu Faverge * @author Mathieu Faverge
* @author Emmanuel Agullo * @author Emmanuel Agullo
* @author Cedric Castagnede * @author Cedric Castagnede
* @date 2010-11-15 * @date 2018-11-09
* @precisions normal z -> s d c * @precisions normal z -> s d c
* *
*/ */
...@@ -68,8 +68,8 @@ void chameleon_pzungqr( int genD, CHAM_desc_t *A, CHAM_desc_t *Q, ...@@ -68,8 +68,8 @@ void chameleon_pzungqr( int genD, CHAM_desc_t *A, CHAM_desc_t *Q,
} }
/* /*
* zunmqr = A->nb * ib * zunmqr = A->nb * ib
* ztsmqr = A->nb * ib * ztpmqrt = A->nb * ib
*/ */
ws_worker = A->nb * ib; ws_worker = A->nb * ib;
...@@ -77,8 +77,8 @@ void chameleon_pzungqr( int genD, CHAM_desc_t *A, CHAM_desc_t *Q, ...@@ -77,8 +77,8 @@ void chameleon_pzungqr( int genD, CHAM_desc_t *A, CHAM_desc_t *Q,
#if defined(CHAMELEON_USE_CUDA) #if defined(CHAMELEON_USE_CUDA)
/* Worker space /* Worker space
* *
* zunmqr = A->nb * ib * zunmqr = A->nb * ib
* ztsmqr = 2 * A->nb * ib * ztpmqrt = 2 * A->nb * ib
*/ */
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 ); ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
#endif #endif
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
* @version 1.0.0 * @version 1.0.0
* @author Mathieu Faverge * @author Mathieu Faverge
* @author Raphael Boucherie * @author Raphael Boucherie
* @date 2017-05-17 * @date 2018-11-09
* @precisions normal z -> s d c * @precisions normal z -> s d c
* *
*/ */
...@@ -60,17 +60,17 @@ void chameleon_pzungqr_param( int genD, int K, ...@@ -60,17 +60,17 @@ void chameleon_pzungqr_param( int genD, int K,
} }
/* /*
* zunmqr = A->nb * ib * zunmqr = A->nb * ib
* ztpmqr = A->nb * ib * ztpmqrt = A->nb * ib
*/ */
ws_worker = A->nb * ib; ws_worker = A->nb * ib;
/* Allocation of temporary (scratch) working space */ /* Allocation of temporary (scratch) working space */
#if defined(CHAMELEON_USE_CUDA) #if defined(CHAMELEON_USE_CUDA)
/* /*
* ztpmqrt = 2 * A->nb * ib * ztpmqrt = 3 * A->nb * ib
*/ */
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 ); ws_worker = chameleon_max( ws_worker, ib * A->nb * 3 );
#endif #endif
ws_worker *= sizeof(CHAMELEON_Complex64_t); ws_worker *= sizeof(CHAMELEON_Complex64_t);
......
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
* @author Mathieu Faverge * @author Mathieu Faverge
* @author Emmanuel Agullo * @author Emmanuel Agullo
* @author Cedric Castagnede * @author Cedric Castagnede
* @date 2010-11-15 * @date 2018-11-09
* @precisions normal z -> s d c * @precisions normal z -> s d c
* *
*/ */
...@@ -67,8 +67,7 @@ void chameleon_pzungqrrh( int genD, int BS, ...@@ -67,8 +67,7 @@ void chameleon_pzungqrrh( int genD, int BS,
} }
/* /*
* zunmqr = A->nb * ib * zunmqr = A->nb * ib
* ztsmqr = A->nb * ib
* ztpmqrt = A->nb * ib * ztpmqrt = A->nb * ib
*/ */
ws_worker = A->nb * ib; ws_worker = A->nb * ib;
...@@ -76,10 +75,10 @@ void chameleon_pzungqrrh( int genD, int BS, ...@@ -76,10 +75,10 @@ void chameleon_pzungqrrh( int genD, int BS,
#if defined(CHAMELEON_USE_CUDA) #if defined(CHAMELEON_USE_CUDA)
/* Worker space /* Worker space
* *
* zunmqr = A->nb * ib * zunmqr = A->nb * ib
* ztsmqr = 2 * A->nb * ib * ztpmqrt = 3 * A->nb * ib
*/ */
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 ); ws_worker = chameleon_max( ws_worker, ib * A->nb * 3 );
#endif #endif
ws_worker *= sizeof(CHAMELEON_Complex64_t); ws_worker *= sizeof(CHAMELEON_Complex64_t);
......
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
* @author Mathieu Faverge * @author Mathieu Faverge
* @author Emmanuel Agullo * @author Emmanuel Agullo
* @author Cedric Castagnede * @author Cedric Castagnede
* @date 2010-11-15 * @date 2018-11-09
* @precisions normal z -> s d c * @precisions normal z -> s d c
* *
*/ */
...@@ -70,16 +70,16 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans, ...@@ -70,16 +70,16 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
} }
/* /*
* zunmlq = A->mb * ib * zunmlq = A->mb * ib
* ztsmlq = A->mb * ib * ztpmlqt = A->mb * ib
*/ */
ws_worker = A->mb * ib; ws_worker = A->mb * ib;
#if defined(CHAMELEON_USE_CUDA) #if defined(CHAMELEON_USE_CUDA)
/* Worker space /* Worker space
* *
* zunmlq = A->mb * ib * zunmlq = A->mb * ib
* ztsmlq = 2 * A->mb * ib * ztpmlqt = 2 * A->mb * ib
*/ */
ws_worker = chameleon_max( ws_worker, ib * A->mb * 2 ); ws_worker = chameleon_max( ws_worker, ib * A->mb * 2 );
#endif #endif
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
* @version 1.0.0 * @version 1.0.0
* @author Mathieu Faverge * @author Mathieu Faverge
* @author Raphael Boucherie * @author Raphael Boucherie
* @date 2017-05-17 * @date 2018-11-09
* @precisions normal z -> s d c * @precisions normal z -> s d c
* *
*/ */
...@@ -63,19 +63,18 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree, ...@@ -63,19 +63,18 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
} }
/* /*
* zunmlq = A->nb * ib * zunmlq = A->nb * ib
* ztsmlq = A->nb * ib * ztpmlqt = A->nb * ib
* zttmlq = A->nb * ib
*/ */
ws_worker = A->nb * ib; ws_worker = A->nb * ib;
#if defined(CHAMELEON_USE_CUDA) #if defined(CHAMELEON_USE_CUDA)
/* Worker space /* Worker space
* *
* zunmlq = A->nb * ib * zunmlq = A->nb * ib
* ztsmlq = 2 * A->nb * ib * ztpmlqt = 3 * A->nb * ib
*/ */
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 ); ws_worker = chameleon_max( ws_worker, ib * A->nb * 3 );
#endif #endif
ws_worker *= sizeof(CHAMELEON_Complex64_t); ws_worker *= sizeof(CHAMELEON_Complex64_t);
......
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
* @author Mathieu Faverge * @author Mathieu Faverge
* @author Emmanuel Agullo * @author Emmanuel Agullo
* @author Cedric Castagnede * @author Cedric Castagnede
* @date 2010-11-15 * @date 2018-11-09
* @precisions normal z -> s d c * @precisions normal z -> s d c
* *
*/ */
...@@ -65,19 +65,18 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans ...@@ -65,19 +65,18 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
} }
/* /*
* zunmlq = A->nb * ib * zunmlq = A->nb * ib
* ztsmlq = A->nb * ib * ztpmlqt = A->nb * ib
* zttmlq = A->nb * ib
*/ */
ws_worker = A->nb * ib; ws_worker = A->nb * ib;
#if defined(CHAMELEON_USE_CUDA) #if defined(CHAMELEON_USE_CUDA)
/* Worker space /* Worker space
* *
* zunmlq = A->nb * ib * zunmlq = A->nb * ib
* ztsmlq = 2 * A->nb * ib * ztpmlqt = 3 * A->nb * ib
*/ */
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 ); ws_worker = chameleon_max( ws_worker, ib * A->nb * 3 );
#endif #endif
ws_worker *= sizeof(CHAMELEON_Complex64_t); ws_worker *= sizeof(CHAMELEON_Complex64_t);
......
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
* @author Mathieu Faverge * @author Mathieu Faverge
* @author Emmanuel Agullo * @author Emmanuel Agullo
* @author Cedric Castagnede * @author Cedric Castagnede