Commit c69e5f0a authored by Mathieu Faverge's avatar Mathieu Faverge

Update the workspace sizes to call ztpmXXt kernels

parent 2f9dd95a
......@@ -19,7 +19,7 @@
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @date 2010-11-15
* @date 2018-11-09
* @precisions normal z -> s d c
*
*/
......@@ -65,10 +65,10 @@ void chameleon_pzgelqf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D
}
/*
* zgelqt = A->nb * (ib+1)
* zunmlq = A->nb * ib
* ztslqt = A->nb * (ib+1)
* ztsmlq = A->nb * ib
* zgelqt = A->nb * (ib+1)
* zunmlq = A->nb * ib
* ztslqt = A->nb * (ib+1)
* ztpmlqt = A->nb * ib
*/
ws_worker = A->nb * (ib+1);
......@@ -76,8 +76,8 @@ void chameleon_pzgelqf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D
#if defined(CHAMELEON_USE_CUDA)
/* Worker space
*
* zunmqr = A->nb * ib
* ztsmqr = 2 * A->nb * ib
* zunmlq = A->nb * ib
* ztpmlqt = 2 * A->nb * ib
*/
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
#endif
......
......@@ -14,7 +14,7 @@
* @version 1.0.0
* @author Mathieu Faverge
* @author Raphael Boucherie
* @date 2017-05-17
* @date 2018-11-09
* @precisions normal z -> s d c
*
*/
......@@ -69,10 +69,10 @@ void chameleon_pzgelqf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
/* Allocation of temporary (scratch) working space */
#if defined(CHAMELEON_USE_CUDA)
/*
* zunmqr = A->nb * ib
* ztpmqrt = 2 * A->nb * ib
* zunmlq = A->nb * ib
* ztpmlqt = 3 * A->nb * ib
*/
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
ws_worker = chameleon_max( ws_worker, ib * A->nb * 3 );
#endif
ws_worker *= sizeof(CHAMELEON_Complex64_t);
......
......@@ -20,7 +20,7 @@
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @date 2010-11-15
* @date 2018-11-09
* @precisions normal z -> s d c
*
*/
......@@ -62,10 +62,10 @@ void chameleon_pzgelqfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
}
/*
* zgelqt = A->nb * (ib+1)
* zunmlq = A->nb * ib
* ztplqt = A->nb * (ib+1)
* ztpmlq = A->nb * ib
* zgelqt = A->nb * (ib+1)
* zunmlq = A->nb * ib
* ztplqt = A->nb * (ib+1)
* ztpmlqt = A->nb * ib
*/
ws_worker = A->nb * (ib+1);
......@@ -73,10 +73,10 @@ void chameleon_pzgelqfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
#if defined(CHAMELEON_USE_CUDA)
/* Worker space
*
* zunmqr = A->nb * ib
* ztpmqr = 2 * A->nb * ib
* zunmlq = A->nb * ib
* ztpmlqt = 3 * A->nb * ib
*/
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
ws_worker = chameleon_max( ws_worker, ib * A->nb * 3 );
#endif
ws_worker *= sizeof(CHAMELEON_Complex64_t);
......
......@@ -19,7 +19,7 @@
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @date 2010-11-15
* @date 2018-11-09
* @precisions normal z -> s d c
*
*/
......@@ -60,10 +60,10 @@ void chameleon_pzgeqrf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D
}
/*
* zgeqrt = A->nb * (ib+1)
* zunmqr = A->nb * ib
* ztsqrt = A->nb * (ib+1)
* ztsmqr = A->nb * ib
* zgeqrt = A->nb * (ib+1)
* zunmqr = A->nb * ib
* ztsqrt = A->nb * (ib+1)
* ztpmqrt = A->nb * ib
*/
ws_worker = A->nb * (ib+1);
......@@ -71,8 +71,8 @@ void chameleon_pzgeqrf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D
#if defined(CHAMELEON_USE_CUDA)
/* Worker space
*
* zunmqr = A->nb * ib
* ztsmqr = 2 * A->nb * ib
* zunmqr = A->nb * ib
* ztpmqrt = 2 * A->nb * ib
*/
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
#endif
......
......@@ -14,7 +14,7 @@
* @version 1.0.0
* @author Mathieu Faverge
* @author Raphael Boucherie
* @date 2017-05-17
* @date 2018-11-09
* @precisions normal z -> s d c
*
*/
......@@ -74,10 +74,10 @@ void chameleon_pzgeqrf_param( int genD, int K,
/* Allocation of temporary (scratch) working space */
#if defined(CHAMELEON_USE_CUDA)
/*
* zunmqr = A->nb * ib
* ztpmqrt = 2 * A->nb * ib
* zunmqr = A->nb * ib
* ztpmqrt = 3 * A->nb * ib
*/
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
ws_worker = chameleon_max( ws_worker, ib * A->nb * 3 );
#endif
ws_worker *= sizeof(CHAMELEON_Complex64_t);
......
......@@ -20,7 +20,7 @@
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @date 2010-11-15
* @date 2018-11-09
* @precisions normal z -> s d c
*
*/
......@@ -62,10 +62,10 @@ void chameleon_pzgeqrfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
}
/*
* zgeqrt = A->nb * (ib+1)
* zunmqr = A->nb * ib
* ztpqrt = A->nb * (ib+1)
* ztpmqr = A->nb * ib
* zgeqrt = A->nb * (ib+1)
* zunmqr = A->nb * ib
* ztpqrt = A->nb * (ib+1)
* ztpmqrt = A->nb * ib
*/
ws_worker = A->nb * (ib+1);
......@@ -73,10 +73,10 @@ void chameleon_pzgeqrfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
#if defined(CHAMELEON_USE_CUDA)
/* Worker space
*
* zunmqr = A->nb * ib
* ztpmqr = 2 * A->nb * ib
* zunmqr = A->nb * ib
* ztpmqrr = 3 * A->nb * ib
*/
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
ws_worker = chameleon_max( ws_worker, ib * A->nb * 3 );
#endif
ws_worker *= sizeof(CHAMELEON_Complex64_t);
......
......@@ -14,7 +14,7 @@
* @version 1.0.0
* @author Hatem Ltaief
* @author Azzam Haidar
* @date 2010-11-15
* @date 2018-11-09
* @precisions normal z -> s d c
*
*/
......@@ -74,9 +74,9 @@ void chameleon_pzhetrd_he2hb(cham_uplo_t uplo,
#if defined(CHAMELEON_USE_CUDA)
/* Worker space
*
* zunmqr = A->nb * ib
* zunmqr = A->nb * ib
* ztsmqr = 2 * A->nb * ib
* zherfb = A->nb * ib
* zherfb = A->nb * ib
*/
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
#endif
......
......@@ -14,7 +14,7 @@
*
* @version 1.0.0
* @author Mathieu Faverge
* @date 2016-12-15
* @date 2018-11-09
* @precisions normal z -> s d c
*
*/
......@@ -66,9 +66,9 @@ void chameleon_pztpgqrt( int KT, int L,
#if defined(CHAMELEON_USE_CUDA)
/* Worker space
*
* ztpmqrt = 2 * Q1->nb * ib
* ztpmqrt = 3 * Q1->nb * ib
*/
ws_worker = chameleon_max( ws_worker, ib * Q1->nb * 2 );
ws_worker = chameleon_max( ws_worker, ib * Q1->nb * 3 );
#endif
ws_worker *= sizeof(CHAMELEON_Complex64_t);
......
......@@ -14,7 +14,7 @@
*
* @version 1.0.0
* @author Mathieu Faverge
* @date 2016-12-15
* @date 2018-11-09
* @precisions normal z -> s d c
*
*/
......@@ -61,9 +61,9 @@ void chameleon_pztpqrt( int L, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_desc_t *T,
/* Allocation of temporary (scratch) working space */
#if defined(CHAMELEON_USE_CUDA)
/*
* ztpmqrt = 2 * A->nb * ib
* ztpmqrt = 3 * A->nb * ib
*/
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
ws_worker = chameleon_max( ws_worker, ib * A->nb * 3 );
#endif
ws_worker *= sizeof(CHAMELEON_Complex64_t);
......
......@@ -19,7 +19,7 @@
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @date 2010-11-15
* @date 2018-11-09
* @precisions normal z -> s d c
*
*/
......@@ -67,8 +67,8 @@ void chameleon_pzunglq( int genD, CHAM_desc_t *A, CHAM_desc_t *Q, CHAM_desc_t *T
}
/*
* zunmlq = A->nb * ib
* ztpmlq = A->nb * ib
* zunmlq = A->nb * ib
* ztpmlqt = A->nb * ib
*/
ws_worker = A->nb * ib;
......@@ -76,8 +76,8 @@ void chameleon_pzunglq( int genD, CHAM_desc_t *A, CHAM_desc_t *Q, CHAM_desc_t *T
#if defined(CHAMELEON_USE_CUDA)
/* Worker space
*
* zunmlq = A->nb * ib
* ztpmlq = 2 * A->nb * ib
* zunmlq = A->nb * ib
* ztpmlqt = 2 * A->nb * ib
*/
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
#endif
......
......@@ -14,7 +14,7 @@
* @version 1.0.0
* @author Mathieu Faverge
* @author Raphael Boucherie
* @date 2017-05-17
* @date 2018-11-09
* @precisions normal z -> s d c
*
*/
......@@ -63,18 +63,18 @@ void chameleon_pzunglq_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
}
/*
* zunmqr = A->nb * ib
* ztpmqr = A->nb * ib
* zunmlq = A->nb * ib
* ztpmlqt = A->nb * ib
*/
ws_worker = A->nb * ib;
#if defined(CHAMELEON_USE_CUDA)
/* Worker space
*
* zunmqr = A->nb * ib
* ztpmqr = 2 * A->nb * ib
* zunmlq = A->nb * ib
* ztpmlqt = 3 * A->nb * ib
*/
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
ws_worker = chameleon_max( ws_worker, ib * A->nb * 3 );
#endif
ws_worker *= sizeof(CHAMELEON_Complex64_t);
......
......@@ -18,7 +18,7 @@
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @date 2011-05-24
* @date 2018-11-09
* @precisions normal z -> s d c
*
*/
......@@ -64,18 +64,18 @@ void chameleon_pzunglqrh( int genD, int BS,
}
/*
* zunmqr = A->nb * ib
* ztpmqr = A->nb * ib
* zunmlq = A->nb * ib
* ztpmlqt = A->nb * ib
*/
ws_worker = A->nb * ib;
#if defined(CHAMELEON_USE_CUDA)
/* Worker space
*
* zunmqr = A->nb * ib
* ztpmqr = 2 * A->nb * ib
* zunmlq = A->nb * ib
* ztpmlqt = 3 * A->nb * ib
*/
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
ws_worker = chameleon_max( ws_worker, ib * A->nb * 3 );
#endif
ws_worker *= sizeof(CHAMELEON_Complex64_t);
......
......@@ -19,7 +19,7 @@
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @date 2010-11-15
* @date 2018-11-09
* @precisions normal z -> s d c
*
*/
......@@ -68,8 +68,8 @@ void chameleon_pzungqr( int genD, CHAM_desc_t *A, CHAM_desc_t *Q,
}
/*
* zunmqr = A->nb * ib
* ztsmqr = A->nb * ib
* zunmqr = A->nb * ib
* ztpmqrt = A->nb * ib
*/
ws_worker = A->nb * ib;
......@@ -77,8 +77,8 @@ void chameleon_pzungqr( int genD, CHAM_desc_t *A, CHAM_desc_t *Q,
#if defined(CHAMELEON_USE_CUDA)
/* Worker space
*
* zunmqr = A->nb * ib
* ztsmqr = 2 * A->nb * ib
* zunmqr = A->nb * ib
* ztpmqrt = 2 * A->nb * ib
*/
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
#endif
......
......@@ -14,7 +14,7 @@
* @version 1.0.0
* @author Mathieu Faverge
* @author Raphael Boucherie
* @date 2017-05-17
* @date 2018-11-09
* @precisions normal z -> s d c
*
*/
......@@ -60,17 +60,17 @@ void chameleon_pzungqr_param( int genD, int K,
}
/*
* zunmqr = A->nb * ib
* ztpmqr = A->nb * ib
* zunmqr = A->nb * ib
* ztpmqrt = A->nb * ib
*/
ws_worker = A->nb * ib;
/* Allocation of temporary (scratch) working space */
#if defined(CHAMELEON_USE_CUDA)
/*
* ztpmqrt = 2 * A->nb * ib
* ztpmqrt = 3 * A->nb * ib
*/
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
ws_worker = chameleon_max( ws_worker, ib * A->nb * 3 );
#endif
ws_worker *= sizeof(CHAMELEON_Complex64_t);
......
......@@ -20,7 +20,7 @@
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @date 2010-11-15
* @date 2018-11-09
* @precisions normal z -> s d c
*
*/
......@@ -67,8 +67,7 @@ void chameleon_pzungqrrh( int genD, int BS,
}
/*
* zunmqr = A->nb * ib
* ztsmqr = A->nb * ib
* zunmqr = A->nb * ib
* ztpmqrt = A->nb * ib
*/
ws_worker = A->nb * ib;
......@@ -76,10 +75,10 @@ void chameleon_pzungqrrh( int genD, int BS,
#if defined(CHAMELEON_USE_CUDA)
/* Worker space
*
* zunmqr = A->nb * ib
* ztsmqr = 2 * A->nb * ib
* zunmqr = A->nb * ib
* ztpmqrt = 3 * A->nb * ib
*/
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
ws_worker = chameleon_max( ws_worker, ib * A->nb * 3 );
#endif
ws_worker *= sizeof(CHAMELEON_Complex64_t);
......
......@@ -20,7 +20,7 @@
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @date 2010-11-15
* @date 2018-11-09
* @precisions normal z -> s d c
*
*/
......@@ -70,16 +70,16 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
}
/*
* zunmlq = A->mb * ib
* ztsmlq = A->mb * ib
* zunmlq = A->mb * ib
* ztpmlqt = A->mb * ib
*/
ws_worker = A->mb * ib;
#if defined(CHAMELEON_USE_CUDA)
/* Worker space
*
* zunmlq = A->mb * ib
* ztsmlq = 2 * A->mb * ib
* zunmlq = A->mb * ib
* ztpmlqt = 2 * A->mb * ib
*/
ws_worker = chameleon_max( ws_worker, ib * A->mb * 2 );
#endif
......
......@@ -14,7 +14,7 @@
* @version 1.0.0
* @author Mathieu Faverge
* @author Raphael Boucherie
* @date 2017-05-17
* @date 2018-11-09
* @precisions normal z -> s d c
*
*/
......@@ -63,19 +63,18 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
}
/*
* zunmlq = A->nb * ib
* ztsmlq = A->nb * ib
* zttmlq = A->nb * ib
* zunmlq = A->nb * ib
* ztpmlqt = A->nb * ib
*/
ws_worker = A->nb * ib;
#if defined(CHAMELEON_USE_CUDA)
/* Worker space
*
* zunmlq = A->nb * ib
* ztsmlq = 2 * A->nb * ib
* zunmlq = A->nb * ib
* ztpmlqt = 3 * A->nb * ib
*/
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
ws_worker = chameleon_max( ws_worker, ib * A->nb * 3 );
#endif
ws_worker *= sizeof(CHAMELEON_Complex64_t);
......
......@@ -20,7 +20,7 @@
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @date 2010-11-15
* @date 2018-11-09
* @precisions normal z -> s d c
*
*/
......@@ -65,19 +65,18 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
}
/*
* zunmlq = A->nb * ib
* ztsmlq = A->nb * ib
* zttmlq = A->nb * ib
* zunmlq = A->nb * ib
* ztpmlqt = A->nb * ib
*/
ws_worker = A->nb * ib;
#if defined(CHAMELEON_USE_CUDA)
/* Worker space
*
* zunmlq = A->nb * ib
* ztsmlq = 2 * A->nb * ib
* zunmlq = A->nb * ib
* ztpmlqt = 3 * A->nb * ib
*/
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
ws_worker = chameleon_max( ws_worker, ib * A->nb * 3 );
#endif
ws_worker *= sizeof(CHAMELEON_Complex64_t);
......
......@@ -20,7 +20,7 @@
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @date 2010-11-15
* @date 2018-11-09
* @precisions normal z -> s d c
*
*/
......@@ -70,16 +70,16 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans,
}
/*
* zunmqr = A->nb * ib
* ztsmqr = A->nb * ib
* zunmqr = A->nb * ib
* ztpmqrt = A->nb * ib
*/
ws_worker = A->nb * ib;
#if defined(CHAMELEON_USE_CUDA)
/* Worker space
*
* zunmqr = A->nb * ib
* ztsmqr = 2 * A->nb * ib
* zunmqr = A->nb * ib
* ztpmqrt = 2 * A->nb * ib
*/
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
#endif
......
......@@ -14,7 +14,7 @@
* @version 1.0.0
* @author Mathieu Faverge
* @author Raphael Boucherie
* @date 2017-05-17
* @date 2018-11-09
* @precisions normal z -> s d c
*
*/
......@@ -63,8 +63,7 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
}
/*
* zunmqr = A->nb * ib
* ztsmqr = A->nb * ib
* zunmqr = A->nb * ib
* ztpmqrt = A->nb * ib
*/
ws_worker = A->nb * ib;
......@@ -72,10 +71,10 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
#if defined(CHAMELEON_USE_CUDA)
/* Worker space
*
* zunmqr = A->nb * ib
* ztsmqr = 2 * A->nb * ib
* zunmqr = A->nb * ib
* ztpmqrt = 3 * A->nb * ib
*/
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
ws_worker = chameleon_max( ws_worker, ib * A->nb * 3 );
#endif
ws_worker *= sizeof(CHAMELEON_Complex64_t);
......
......@@ -20,7 +20,7 @@
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @date 2010-11-15
* @date 2018-11-09
* @precisions normal z -> s d c
*
*/
......@@ -66,8 +66,7 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
}
/*
* zunmqr = A->nb * ib
* ztsmqr = A->nb * ib
* zunmqr = A->nb * ib
* ztpmqrt = A->nb * ib
*/
ws_worker = A->nb * ib;
......@@ -75,10 +74,10 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
#if defined(CHAMELEON_USE_CUDA)
/* Worker space
*
* zunmqr = A->nb * ib
* ztsmqr = 2 * A->nb * ib
* zunmqr = A->nb * ib
* ztpmqrt = 3 * A->nb * ib
*/
ws_worker = chameleon_max( ws_worker, ib * A->nb * 2 );
ws_worker = chameleon_max( ws_worker, ib * A->nb * 3 );
#endif
ws_worker *= sizeof(CHAMELEON_Complex64_t);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment