Commit c69e5f0a authored by Mathieu Faverge's avatar Mathieu Faverge

Update the workspace sizes to call ztpmXXt kernels

parent 2f9dd95a
......@@ -19,7 +19,7 @@
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @date 2010-11-15
* @date 2018-11-09
* @precisions normal z -> s d c
*
*/
......@@ -68,7 +68,7 @@ void chameleon_pzgelqf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D
* zgelqt = A->nb * (ib+1)
* zunmlq = A->nb * ib
* ztslqt = A->nb * (ib+1)
* ztsmlq = A->nb * ib
* ztpmlqt = A->nb * ib
*/
ws_worker = A->nb * (ib+1);
......@@ -76,8 +76,8 @@ void chameleon_pzgelqf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D
#if defined(CHAMELEON_USE_CUDA)
/* Worker space
*
* zunmqr = A->nb * ib
* ztsmqr = 2 * A->nb * ib
* zunmlq = A->nb * ib
* ztpmlqt = 2 * A->nb * ib
*/
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
#endif
......
......@@ -14,7 +14,7 @@
* @version 1.0.0
* @author Mathieu Faverge
* @author Raphael Boucherie
* @date 2017-05-17
* @date 2018-11-09
* @precisions normal z -> s d c
*
*/
......@@ -69,10 +69,10 @@ void chameleon_pzgelqf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
/* Allocation of temporary (scratch) working space */
#if defined(CHAMELEON_USE_CUDA)
/*
* zunmqr = A->nb * ib
* ztpmqrt = 2 * A->nb * ib
* zunmlq = A->nb * ib
* ztpmlqt = 3 * A->nb * ib
*/
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
ws_worker = chameleon_max( ws_worker, ib * A->nb * 3 );
#endif
ws_worker *= sizeof(CHAMELEON_Complex64_t);
......
......@@ -20,7 +20,7 @@
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @date 2010-11-15
* @date 2018-11-09
* @precisions normal z -> s d c
*
*/
......@@ -65,7 +65,7 @@ void chameleon_pzgelqfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
* zgelqt = A->nb * (ib+1)
* zunmlq = A->nb * ib
* ztplqt = A->nb * (ib+1)
* ztpmlq = A->nb * ib
* ztpmlqt = A->nb * ib
*/
ws_worker = A->nb * (ib+1);
......@@ -73,10 +73,10 @@ void chameleon_pzgelqfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
#if defined(CHAMELEON_USE_CUDA)
/* Worker space
*
* zunmqr = A->nb * ib
* ztpmqr = 2 * A->nb * ib
* zunmlq = A->nb * ib
* ztpmlqt = 3 * A->nb * ib
*/
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
ws_worker = chameleon_max( ws_worker, ib * A->nb * 3 );
#endif
ws_worker *= sizeof(CHAMELEON_Complex64_t);
......
......@@ -19,7 +19,7 @@
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @date 2010-11-15
* @date 2018-11-09
* @precisions normal z -> s d c
*
*/
......@@ -63,7 +63,7 @@ void chameleon_pzgeqrf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D
* zgeqrt = A->nb * (ib+1)
* zunmqr = A->nb * ib
* ztsqrt = A->nb * (ib+1)
* ztsmqr = A->nb * ib
* ztpmqrt = A->nb * ib
*/
ws_worker = A->nb * (ib+1);
......@@ -72,7 +72,7 @@ void chameleon_pzgeqrf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D
/* Worker space
*
* zunmqr = A->nb * ib
* ztsmqr = 2 * A->nb * ib
* ztpmqrt = 2 * A->nb * ib
*/
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
#endif
......
......@@ -14,7 +14,7 @@
* @version 1.0.0
* @author Mathieu Faverge
* @author Raphael Boucherie
* @date 2017-05-17
* @date 2018-11-09
* @precisions normal z -> s d c
*
*/
......@@ -75,9 +75,9 @@ void chameleon_pzgeqrf_param( int genD, int K,
#if defined(CHAMELEON_USE_CUDA)
/*
* zunmqr = A->nb * ib
* ztpmqrt = 2 * A->nb * ib
* ztpmqrt = 3 * A->nb * ib
*/
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
ws_worker = chameleon_max( ws_worker, ib * A->nb * 3 );
#endif
ws_worker *= sizeof(CHAMELEON_Complex64_t);
......
......@@ -20,7 +20,7 @@
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @date 2010-11-15
* @date 2018-11-09
* @precisions normal z -> s d c
*
*/
......@@ -65,7 +65,7 @@ void chameleon_pzgeqrfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
* zgeqrt = A->nb * (ib+1)
* zunmqr = A->nb * ib
* ztpqrt = A->nb * (ib+1)
* ztpmqr = A->nb * ib
* ztpmqrt = A->nb * ib
*/
ws_worker = A->nb * (ib+1);
......@@ -74,9 +74,9 @@ void chameleon_pzgeqrfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
/* Worker space
*
* zunmqr = A->nb * ib
* ztpmqr = 2 * A->nb * ib
* ztpmqrr = 3 * A->nb * ib
*/
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
ws_worker = chameleon_max( ws_worker, ib * A->nb * 3 );
#endif
ws_worker *= sizeof(CHAMELEON_Complex64_t);
......
......@@ -14,7 +14,7 @@
* @version 1.0.0
* @author Hatem Ltaief
* @author Azzam Haidar
* @date 2010-11-15
* @date 2018-11-09
* @precisions normal z -> s d c
*
*/
......
......@@ -14,7 +14,7 @@
*
* @version 1.0.0
* @author Mathieu Faverge
* @date 2016-12-15
* @date 2018-11-09
* @precisions normal z -> s d c
*
*/
......@@ -66,9 +66,9 @@ void chameleon_pztpgqrt( int KT, int L,
#if defined(CHAMELEON_USE_CUDA)
/* Worker space
*
* ztpmqrt = 2 * Q1->nb * ib
* ztpmqrt = 3 * Q1->nb * ib
*/
ws_worker = chameleon_max( ws_worker, ib * Q1->nb * 2 );
ws_worker = chameleon_max( ws_worker, ib * Q1->nb * 3 );
#endif
ws_worker *= sizeof(CHAMELEON_Complex64_t);
......
......@@ -14,7 +14,7 @@
*
* @version 1.0.0
* @author Mathieu Faverge
* @date 2016-12-15
* @date 2018-11-09
* @precisions normal z -> s d c
*
*/
......@@ -61,9 +61,9 @@ void chameleon_pztpqrt( int L, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_desc_t *T,
/* Allocation of temporary (scratch) working space */
#if defined(CHAMELEON_USE_CUDA)
/*
* ztpmqrt = 2 * A->nb * ib
* ztpmqrt = 3 * A->nb * ib
*/
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
ws_worker = chameleon_max( ws_worker, ib * A->nb * 3 );
#endif
ws_worker *= sizeof(CHAMELEON_Complex64_t);
......
......@@ -19,7 +19,7 @@
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @date 2010-11-15
* @date 2018-11-09
* @precisions normal z -> s d c
*
*/
......@@ -68,7 +68,7 @@ void chameleon_pzunglq( int genD, CHAM_desc_t *A, CHAM_desc_t *Q, CHAM_desc_t *T
/*
* zunmlq = A->nb * ib
* ztpmlq = A->nb * ib
* ztpmlqt = A->nb * ib
*/
ws_worker = A->nb * ib;
......@@ -77,7 +77,7 @@ void chameleon_pzunglq( int genD, CHAM_desc_t *A, CHAM_desc_t *Q, CHAM_desc_t *T
/* Worker space
*
* zunmlq = A->nb * ib
* ztpmlq = 2 * A->nb * ib
* ztpmlqt = 2 * A->nb * ib
*/
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
#endif
......
......@@ -14,7 +14,7 @@
* @version 1.0.0
* @author Mathieu Faverge
* @author Raphael Boucherie
* @date 2017-05-17
* @date 2018-11-09
* @precisions normal z -> s d c
*
*/
......@@ -63,18 +63,18 @@ void chameleon_pzunglq_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
}
/*
* zunmqr = A->nb * ib
* ztpmqr = A->nb * ib
* zunmlq = A->nb * ib
* ztpmlqt = A->nb * ib
*/
ws_worker = A->nb * ib;
#if defined(CHAMELEON_USE_CUDA)
/* Worker space
*
* zunmqr = A->nb * ib
* ztpmqr = 2 * A->nb * ib
* zunmlq = A->nb * ib
* ztpmlqt = 3 * A->nb * ib
*/
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
ws_worker = chameleon_max( ws_worker, ib * A->nb * 3 );
#endif
ws_worker *= sizeof(CHAMELEON_Complex64_t);
......
......@@ -18,7 +18,7 @@
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @date 2011-05-24
* @date 2018-11-09
* @precisions normal z -> s d c
*
*/
......@@ -64,18 +64,18 @@ void chameleon_pzunglqrh( int genD, int BS,
}
/*
* zunmqr = A->nb * ib
* ztpmqr = A->nb * ib
* zunmlq = A->nb * ib
* ztpmlqt = A->nb * ib
*/
ws_worker = A->nb * ib;
#if defined(CHAMELEON_USE_CUDA)
/* Worker space
*
* zunmqr = A->nb * ib
* ztpmqr = 2 * A->nb * ib
* zunmlq = A->nb * ib
* ztpmlqt = 3 * A->nb * ib
*/
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
ws_worker = chameleon_max( ws_worker, ib * A->nb * 3 );
#endif
ws_worker *= sizeof(CHAMELEON_Complex64_t);
......
......@@ -19,7 +19,7 @@
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @date 2010-11-15
* @date 2018-11-09
* @precisions normal z -> s d c
*
*/
......@@ -69,7 +69,7 @@ void chameleon_pzungqr( int genD, CHAM_desc_t *A, CHAM_desc_t *Q,
/*
* zunmqr = A->nb * ib
* ztsmqr = A->nb * ib
* ztpmqrt = A->nb * ib
*/
ws_worker = A->nb * ib;
......@@ -78,7 +78,7 @@ void chameleon_pzungqr( int genD, CHAM_desc_t *A, CHAM_desc_t *Q,
/* Worker space
*
* zunmqr = A->nb * ib
* ztsmqr = 2 * A->nb * ib
* ztpmqrt = 2 * A->nb * ib
*/
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
#endif
......
......@@ -14,7 +14,7 @@
* @version 1.0.0
* @author Mathieu Faverge
* @author Raphael Boucherie
* @date 2017-05-17
* @date 2018-11-09
* @precisions normal z -> s d c
*
*/
......@@ -61,16 +61,16 @@ void chameleon_pzungqr_param( int genD, int K,
/*
* zunmqr = A->nb * ib
* ztpmqr = A->nb * ib
* ztpmqrt = A->nb * ib
*/
ws_worker = A->nb * ib;
/* Allocation of temporary (scratch) working space */
#if defined(CHAMELEON_USE_CUDA)
/*
* ztpmqrt = 2 * A->nb * ib
* ztpmqrt = 3 * A->nb * ib
*/
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
ws_worker = chameleon_max( ws_worker, ib * A->nb * 3 );
#endif
ws_worker *= sizeof(CHAMELEON_Complex64_t);
......
......@@ -20,7 +20,7 @@
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @date 2010-11-15
* @date 2018-11-09
* @precisions normal z -> s d c
*
*/
......@@ -68,7 +68,6 @@ void chameleon_pzungqrrh( int genD, int BS,
/*
* zunmqr = A->nb * ib
* ztsmqr = A->nb * ib
* ztpmqrt = A->nb * ib
*/
ws_worker = A->nb * ib;
......@@ -77,9 +76,9 @@ void chameleon_pzungqrrh( int genD, int BS,
/* Worker space
*
* zunmqr = A->nb * ib
* ztsmqr = 2 * A->nb * ib
* ztpmqrt = 3 * A->nb * ib
*/
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
ws_worker = chameleon_max( ws_worker, ib * A->nb * 3 );
#endif
ws_worker *= sizeof(CHAMELEON_Complex64_t);
......
......@@ -20,7 +20,7 @@
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @date 2010-11-15
* @date 2018-11-09
* @precisions normal z -> s d c
*
*/
......@@ -71,7 +71,7 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
/*
* zunmlq = A->mb * ib
* ztsmlq = A->mb * ib
* ztpmlqt = A->mb * ib
*/
ws_worker = A->mb * ib;
......@@ -79,7 +79,7 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
/* Worker space
*
* zunmlq = A->mb * ib
* ztsmlq = 2 * A->mb * ib
* ztpmlqt = 2 * A->mb * ib
*/
ws_worker = chameleon_max( ws_worker, ib * A->mb * 2 );
#endif
......
......@@ -14,7 +14,7 @@
* @version 1.0.0
* @author Mathieu Faverge
* @author Raphael Boucherie
* @date 2017-05-17
* @date 2018-11-09
* @precisions normal z -> s d c
*
*/
......@@ -64,8 +64,7 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
/*
* zunmlq = A->nb * ib
* ztsmlq = A->nb * ib
* zttmlq = A->nb * ib
* ztpmlqt = A->nb * ib
*/
ws_worker = A->nb * ib;
......@@ -73,9 +72,9 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
/* Worker space
*
* zunmlq = A->nb * ib
* ztsmlq = 2 * A->nb * ib
* ztpmlqt = 3 * A->nb * ib
*/
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
ws_worker = chameleon_max( ws_worker, ib * A->nb * 3 );
#endif
ws_worker *= sizeof(CHAMELEON_Complex64_t);
......
......@@ -20,7 +20,7 @@
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @date 2010-11-15
* @date 2018-11-09
* @precisions normal z -> s d c
*
*/
......@@ -66,8 +66,7 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
/*
* zunmlq = A->nb * ib
* ztsmlq = A->nb * ib
* zttmlq = A->nb * ib
* ztpmlqt = A->nb * ib
*/
ws_worker = A->nb * ib;
......@@ -75,9 +74,9 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
/* Worker space
*
* zunmlq = A->nb * ib
* ztsmlq = 2 * A->nb * ib
* ztpmlqt = 3 * A->nb * ib
*/
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
ws_worker = chameleon_max( ws_worker, ib * A->nb * 3 );
#endif
ws_worker *= sizeof(CHAMELEON_Complex64_t);
......
......@@ -20,7 +20,7 @@
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @date 2010-11-15
* @date 2018-11-09
* @precisions normal z -> s d c
*
*/
......@@ -71,7 +71,7 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans,
/*
* zunmqr = A->nb * ib
* ztsmqr = A->nb * ib
* ztpmqrt = A->nb * ib
*/
ws_worker = A->nb * ib;
......@@ -79,7 +79,7 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans,
/* Worker space
*
* zunmqr = A->nb * ib
* ztsmqr = 2 * A->nb * ib
* ztpmqrt = 2 * A->nb * ib
*/
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
#endif
......
......@@ -14,7 +14,7 @@
* @version 1.0.0
* @author Mathieu Faverge
* @author Raphael Boucherie
* @date 2017-05-17
* @date 2018-11-09
* @precisions normal z -> s d c
*
*/
......@@ -64,7 +64,6 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
/*
* zunmqr = A->nb * ib
* ztsmqr = A->nb * ib
* ztpmqrt = A->nb * ib
*/
ws_worker = A->nb * ib;
......@@ -73,9 +72,9 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
/* Worker space
*
* zunmqr = A->nb * ib
* ztsmqr = 2 * A->nb * ib
* ztpmqrt = 3 * A->nb * ib
*/
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
ws_worker = chameleon_max( ws_worker, ib * A->nb * 3 );
#endif
ws_worker *= sizeof(CHAMELEON_Complex64_t);
......
......@@ -20,7 +20,7 @@
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @date 2010-11-15
* @date 2018-11-09
* @precisions normal z -> s d c
*
*/
......@@ -67,7 +67,6 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
/*
* zunmqr = A->nb * ib
* ztsmqr = A->nb * ib
* ztpmqrt = A->nb * ib
*/
ws_worker = A->nb * ib;
......@@ -76,9 +75,9 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
/* Worker space
*
* zunmqr = A->nb * ib
* ztsmqr = 2 * A->nb * ib
* ztpmqrt = 3 * A->nb * ib
*/
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
ws_worker = chameleon_max( ws_worker, ib * A->nb * 3 );
#endif
ws_worker *= sizeof(CHAMELEON_Complex64_t);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment