Commit aa785c17 authored by Mathieu Faverge's avatar Mathieu Faverge
Browse files

Fix A initialization to avoid allocating the full A if not necessary (For now,...

Fix A initialization to avoid allocating the full A if not necessary (For now, still required for checking purpose)
parent 1c5fe0c8
...@@ -42,7 +42,7 @@ RunTest(int *iparam, double *dparam, chameleon_time_t *t_) ...@@ -42,7 +42,7 @@ RunTest(int *iparam, double *dparam, chameleon_time_t *t_)
PASTE_CODE_ALLOCATE_MATRIX( Acpy, check, CHAMELEON_Complex64_t, LDA, N ); PASTE_CODE_ALLOCATE_MATRIX( Acpy, check, CHAMELEON_Complex64_t, LDA, N );
PASTE_CODE_ALLOCATE_MATRIX( b, check, CHAMELEON_Complex64_t, LDB, NRHS); PASTE_CODE_ALLOCATE_MATRIX( b, check, CHAMELEON_Complex64_t, LDB, NRHS);
/* Initialiaze Data */ /* Initialize Data */
CHAMELEON_zplrnt( M, N, A, LDA, 453 ); CHAMELEON_zplrnt( M, N, A, LDA, 453 );
CHAMELEON_zplrnt( M, NRHS, x, LDB, 5673 ); CHAMELEON_zplrnt( M, NRHS, x, LDB, 5673 );
......
...@@ -40,7 +40,7 @@ RunTest(int *iparam, double *dparam, chameleon_time_t *t_) ...@@ -40,7 +40,7 @@ RunTest(int *iparam, double *dparam, chameleon_time_t *t_)
PASTE_CODE_ALLOCATE_MATRIX_TILE( descB, 1, CHAMELEON_Complex64_t, ChamComplexDouble, LDB, K, N ); PASTE_CODE_ALLOCATE_MATRIX_TILE( descB, 1, CHAMELEON_Complex64_t, ChamComplexDouble, LDB, K, N );
PASTE_CODE_ALLOCATE_MATRIX_TILE( descC, 1, CHAMELEON_Complex64_t, ChamComplexDouble, LDC, M, N ); PASTE_CODE_ALLOCATE_MATRIX_TILE( descC, 1, CHAMELEON_Complex64_t, ChamComplexDouble, LDC, M, N );
/* Initialiaze Data */ /* Initialize Data */
CHAMELEON_zplrnt_Tile( descA, 5373 ); CHAMELEON_zplrnt_Tile( descA, 5373 );
CHAMELEON_zplrnt_Tile( descB, 7672 ); CHAMELEON_zplrnt_Tile( descB, 7672 );
CHAMELEON_zplrnt_Tile( descC, 6387 ); CHAMELEON_zplrnt_Tile( descC, 6387 );
......
...@@ -41,7 +41,7 @@ RunTest(int *iparam, double *dparam, chameleon_time_t *t_) ...@@ -41,7 +41,7 @@ RunTest(int *iparam, double *dparam, chameleon_time_t *t_)
PASTE_CODE_ALLOCATE_MATRIX( A, 1, CHAMELEON_Complex64_t, LDA, N ); PASTE_CODE_ALLOCATE_MATRIX( A, 1, CHAMELEON_Complex64_t, LDA, N );
PASTE_CODE_ALLOCATE_MATRIX( X, 1, CHAMELEON_Complex64_t, LDB, NRHS ); PASTE_CODE_ALLOCATE_MATRIX( X, 1, CHAMELEON_Complex64_t, LDB, NRHS );
/* Initialiaze Data */ /* Initialize Data */
CHAMELEON_zplrnt( N, N, A, LDA, 51 ); CHAMELEON_zplrnt( N, N, A, LDA, 51 );
CHAMELEON_zplrnt( N, NRHS, X, LDB, 5673 ); CHAMELEON_zplrnt( N, NRHS, X, LDB, 5673 );
......
...@@ -39,7 +39,7 @@ RunTest(int *iparam, double *dparam, chameleon_time_t *t_) ...@@ -39,7 +39,7 @@ RunTest(int *iparam, double *dparam, chameleon_time_t *t_)
PASTE_CODE_ALLOCATE_MATRIX( A, 1, CHAMELEON_Complex64_t, LDA, N ); PASTE_CODE_ALLOCATE_MATRIX( A, 1, CHAMELEON_Complex64_t, LDA, N );
PASTE_CODE_ALLOCATE_MATRIX( X, 1, CHAMELEON_Complex64_t, LDB, NRHS ); PASTE_CODE_ALLOCATE_MATRIX( X, 1, CHAMELEON_Complex64_t, LDB, NRHS );
/* Initialiaze Data */ /* Initialize Data */
CHAMELEON_zplrnt( N, N, A, LDA, 51 ); CHAMELEON_zplrnt( N, N, A, LDA, 51 );
CHAMELEON_zplrnt( N, NRHS, X, LDB, 5673 ); CHAMELEON_zplrnt( N, NRHS, X, LDB, 5673 );
......
...@@ -39,7 +39,7 @@ RunTest(int *iparam, double *dparam, chameleon_time_t *t_) ...@@ -39,7 +39,7 @@ RunTest(int *iparam, double *dparam, chameleon_time_t *t_)
PASTE_CODE_ALLOCATE_MATRIX( U, (jobu == ChamVec), CHAMELEON_Complex64_t, M, M ); PASTE_CODE_ALLOCATE_MATRIX( U, (jobu == ChamVec), CHAMELEON_Complex64_t, M, M );
PASTE_CODE_ALLOCATE_MATRIX( S, 1, double, N, 1 ); PASTE_CODE_ALLOCATE_MATRIX( S, 1, double, N, 1 );
/* Initialiaze Data */ /* Initialize Data */
CHAMELEON_zplrnt_Tile(descA, 51 ); CHAMELEON_zplrnt_Tile(descA, 51 );
/* Allocate Workspace */ /* Allocate Workspace */
......
...@@ -41,7 +41,7 @@ RunTest(int *iparam, double *dparam, chameleon_time_t *t_) ...@@ -41,7 +41,7 @@ RunTest(int *iparam, double *dparam, chameleon_time_t *t_)
PASTE_CODE_ALLOCATE_MATRIX( S, 1, double, N, 1 ); PASTE_CODE_ALLOCATE_MATRIX( S, 1, double, N, 1 );
/* Allocate Workspace */ /* Allocate Workspace */
CHAMELEON_zplghe( (double)N, ChamUpperLower, N, A, LDA, 51 ); CHAMELEON_zplghe( (double)N, uplo, N, A, LDA, 51 );
CHAMELEON_Alloc_Workspace_zheevd(N, N, &descT, 1, 1); CHAMELEON_Alloc_Workspace_zheevd(N, N, &descT, 1, 1);
START_TIMING(); START_TIMING();
......
...@@ -34,33 +34,38 @@ RunTest(int *iparam, double *dparam, chameleon_time_t *t_) ...@@ -34,33 +34,38 @@ RunTest(int *iparam, double *dparam, chameleon_time_t *t_)
LDA = chameleon_max(LDA, N); LDA = chameleon_max(LDA, N);
/* Allocate Data */ /* Allocate Data */
PASTE_CODE_ALLOCATE_MATRIX( A, 1, CHAMELEON_Complex64_t, LDA, N ); PASTE_CODE_ALLOCATE_MATRIX( A, 1, CHAMELEON_Complex64_t, LDA, N );
PASTE_CODE_ALLOCATE_MATRIX( X, 1, CHAMELEON_Complex64_t, LDB, NRHS ); PASTE_CODE_ALLOCATE_MATRIX( A2, check, CHAMELEON_Complex64_t, LDA, N );
PASTE_CODE_ALLOCATE_MATRIX( X, 1, CHAMELEON_Complex64_t, LDB, NRHS );
/* Initialiaze Data */ /* Initialize data and save A if check */
CHAMELEON_zplghe((double)N, ChamUpperLower, N, A, LDA, 51 ); if ( check ) {
CHAMELEON_zplghe( (double)N, ChamUpperLower, N, A2, LDA, 51 );
CHAMELEON_zlacpy( uplo, N, N, A2, LDA, A, LDA );
}
else {
CHAMELEON_zplghe( (double)N, uplo, N, A, LDA, 51 );
}
CHAMELEON_zplrnt( N, NRHS, X, LDB, 5673 ); CHAMELEON_zplrnt( N, NRHS, X, LDB, 5673 );
/* Save A and b */ /* Save b */
PASTE_CODE_ALLOCATE_COPY( Acpy, check, CHAMELEON_Complex64_t, A, LDA, N ); PASTE_CODE_ALLOCATE_COPY( B, check, CHAMELEON_Complex64_t, X, LDB, NRHS );
PASTE_CODE_ALLOCATE_COPY( B, check, CHAMELEON_Complex64_t, X, LDB, NRHS );
/* CHAMELEON ZPOSV */ /* CHAMELEON ZPOSV */
START_TIMING(); START_TIMING();
CHAMELEON_zposv(uplo, N, NRHS, A, LDA, X, LDB); CHAMELEON_zposv( uplo, N, NRHS, A, LDA, X, LDB );
STOP_TIMING(); STOP_TIMING();
/* Check the solution */ /* Check the solution */
if (check) if (check)
{ {
dparam[IPARAM_RES] = z_check_solution(N, N, NRHS, Acpy, LDA, B, X, LDB, dparam[IPARAM_RES] = z_check_solution( N, N, NRHS, A2, LDA, B, X, LDB,
&(dparam[IPARAM_ANORM]), &(dparam[IPARAM_ANORM]),
&(dparam[IPARAM_BNORM]), &(dparam[IPARAM_BNORM]),
&(dparam[IPARAM_XNORM])); &(dparam[IPARAM_XNORM]) );
free(Acpy); free(B); free(A2); free(B);
} }
free(A); free(X); free(A); free(X);
return 0; return 0;
} }
...@@ -38,34 +38,39 @@ RunTest(int *iparam, double *dparam, chameleon_time_t *t_) ...@@ -38,34 +38,39 @@ RunTest(int *iparam, double *dparam, chameleon_time_t *t_)
PASTE_CODE_ALLOCATE_MATRIX_TILE( descAC, check, CHAMELEON_Complex64_t, ChamComplexDouble, LDA, N, N ); PASTE_CODE_ALLOCATE_MATRIX_TILE( descAC, check, CHAMELEON_Complex64_t, ChamComplexDouble, LDA, N, N );
PASTE_CODE_ALLOCATE_MATRIX_TILE( descB, check, CHAMELEON_Complex64_t, ChamComplexDouble, LDB, N, NRHS ); PASTE_CODE_ALLOCATE_MATRIX_TILE( descB, check, CHAMELEON_Complex64_t, ChamComplexDouble, LDB, N, NRHS );
/* Initialize AT and bT for Symmetric Positif Matrix */ /* Initialize data and save A and B if check */
CHAMELEON_zplghe_Tile((double)N, ChamUpperLower, descA, 51 ); CHAMELEON_zplrnt_Tile( descX, 7672 );
CHAMELEON_zplrnt_Tile( descX, 7732 ); if ( check ) {
CHAMELEON_zplghe_Tile( (double)N, ChamUpperLower, descAC, 51 );
CHAMELEON_zlacpy_Tile( uplo, descAC, descA );
/* Save AT and bT for check */ CHAMELEON_zlacpy_Tile( ChamUpperLower, descX, descB );
if (check == 1){
CHAMELEON_zlacpy_Tile(ChamUpperLower, descA, descAC);
CHAMELEON_zlacpy_Tile(ChamUpperLower, descX, descB);
} }
else {
CHAMELEON_zplghe_Tile( (double)N, uplo, descA, 51 );
}
/* CHAMELEON ZPOSV */ /* CHAMELEON ZPOSV */
START_TIMING(); START_TIMING();
CHAMELEON_zposv_Tile(uplo, descA, descX); CHAMELEON_zposv_Tile( uplo, descA, descX );
STOP_TIMING(); STOP_TIMING();
/* Check the solution */ /* Check the solution */
if (check) if (check)
{ {
dparam[IPARAM_ANORM] = CHAMELEON_zlange_Tile(ChamInfNorm, descAC); dparam[IPARAM_ANORM] = CHAMELEON_zlange_Tile( ChamInfNorm, descAC );
dparam[IPARAM_BNORM] = CHAMELEON_zlange_Tile(ChamInfNorm, descB); dparam[IPARAM_BNORM] = CHAMELEON_zlange_Tile( ChamInfNorm, descB );
dparam[IPARAM_XNORM] = CHAMELEON_zlange_Tile(ChamInfNorm, descX); dparam[IPARAM_XNORM] = CHAMELEON_zlange_Tile( ChamInfNorm, descX );
CHAMELEON_zgemm_Tile( ChamNoTrans, ChamNoTrans, 1.0, descAC, descX, -1.0, descB ); CHAMELEON_zgemm_Tile( ChamNoTrans, ChamNoTrans, 1.0, descAC, descX, -1.0, descB );
dparam[IPARAM_RES] = CHAMELEON_zlange_Tile(ChamInfNorm, descB);
dparam[IPARAM_RES] = CHAMELEON_zlange_Tile( ChamInfNorm, descB );
PASTE_CODE_FREE_MATRIX( descAC ); PASTE_CODE_FREE_MATRIX( descAC );
PASTE_CODE_FREE_MATRIX( descB ); PASTE_CODE_FREE_MATRIX( descB );
} }
PASTE_CODE_FREE_MATRIX( descA ); PASTE_CODE_FREE_MATRIX( descA );
PASTE_CODE_FREE_MATRIX( descX ); PASTE_CODE_FREE_MATRIX( descX );
return 0; return 0;
} }
...@@ -31,40 +31,43 @@ RunTest(int *iparam, double *dparam, chameleon_time_t *t_) ...@@ -31,40 +31,43 @@ RunTest(int *iparam, double *dparam, chameleon_time_t *t_)
PASTE_CODE_IPARAM_LOCALS( iparam ); PASTE_CODE_IPARAM_LOCALS( iparam );
cham_uplo_t uplo = ChamLower; cham_uplo_t uplo = ChamLower;
LDA = chameleon_max(LDA, N); LDA = chameleon_max( LDA, N );
/* Allocate Data */ /* Allocate Data */
PASTE_CODE_ALLOCATE_MATRIX( A, 1, CHAMELEON_Complex64_t, LDA, N ); PASTE_CODE_ALLOCATE_MATRIX( A, 1, CHAMELEON_Complex64_t, LDA, N );
PASTE_CODE_ALLOCATE_MATRIX( A2, check, CHAMELEON_Complex64_t, LDA, N );
/* Initialiaze Data */ /* Initialize data and save A if check */
CHAMELEON_zplghe( (double)N, ChamUpperLower, N, A, LDA, 51 ); if ( check ) {
CHAMELEON_zplghe( (double)N, ChamUpperLower, N, A2, LDA, 51 );
/* Save A and b */ CHAMELEON_zlacpy( uplo, N, N, A2, LDA, A, LDA );
PASTE_CODE_ALLOCATE_COPY( A2, check, CHAMELEON_Complex64_t, A, LDA, N ); }
else {
CHAMELEON_zplghe( (double)N, uplo, N, A, LDA, 51 );
}
/* CHAMELEON ZPOSV */ /* CHAMELEON ZPOSV */
START_TIMING(); START_TIMING();
CHAMELEON_zpotrf(uplo, N, A, LDA); CHAMELEON_zpotrf( uplo, N, A, LDA );
STOP_TIMING(); STOP_TIMING();
/* Check the solution */ /* Check the solution */
if (check) if ( check )
{ {
PASTE_CODE_ALLOCATE_MATRIX( B, check, CHAMELEON_Complex64_t, LDB, NRHS ); PASTE_CODE_ALLOCATE_MATRIX( B, check, CHAMELEON_Complex64_t, LDB, NRHS );
CHAMELEON_zplrnt( N, NRHS, B, LDB, 5673 ); CHAMELEON_zplrnt( N, NRHS, B, LDB, 5673 );
PASTE_CODE_ALLOCATE_COPY( X, check, CHAMELEON_Complex64_t, B, LDB, NRHS ); PASTE_CODE_ALLOCATE_COPY( X, check, CHAMELEON_Complex64_t, B, LDB, NRHS );
CHAMELEON_zpotrs(uplo, N, NRHS, A, LDA, X, LDB); CHAMELEON_zpotrs(uplo, N, NRHS, A, LDA, X, LDB);
dparam[IPARAM_RES] = z_check_solution(N, N, NRHS, A2, LDA, B, X, LDB, dparam[IPARAM_RES] = z_check_solution( N, N, NRHS, A2, LDA, B, X, LDB,
&(dparam[IPARAM_ANORM]), &(dparam[IPARAM_ANORM]),
&(dparam[IPARAM_BNORM]), &(dparam[IPARAM_BNORM]),
&(dparam[IPARAM_XNORM])); &(dparam[IPARAM_XNORM]) );
free(A2); free(B); free(X); free(A2); free(B); free(X);
} }
free(A); free(A);
return 0; return 0;
} }
...@@ -25,30 +25,32 @@ ...@@ -25,30 +25,32 @@
#include "./timing.c" #include "./timing.c"
static int static int
RunTest(int *iparam, double *dparam, chameleon_time_t *t_) RunTest( int *iparam, double *dparam, chameleon_time_t *t_ )
{ {
PASTE_CODE_IPARAM_LOCALS( iparam ); PASTE_CODE_IPARAM_LOCALS( iparam );
cham_uplo_t uplo = ChamUpper; cham_uplo_t uplo = ChamUpper;
LDA = chameleon_max(LDA, N); LDA = chameleon_max( LDA, N );
/* Allocate Data */ /* Allocate Data */
PASTE_CODE_ALLOCATE_MATRIX_TILE( descA, 1, CHAMELEON_Complex64_t, ChamComplexDouble, LDA, N, N ); PASTE_CODE_ALLOCATE_MATRIX_TILE( descA, 1, CHAMELEON_Complex64_t, ChamComplexDouble, LDA, N, N );
PASTE_CODE_ALLOCATE_MATRIX_TILE( descB, check, CHAMELEON_Complex64_t, ChamComplexDouble, LDB, N, NRHS ); PASTE_CODE_ALLOCATE_MATRIX_TILE( descB, check, CHAMELEON_Complex64_t, ChamComplexDouble, LDB, N, NRHS );
PASTE_CODE_ALLOCATE_MATRIX_TILE( descAC, check, CHAMELEON_Complex64_t, ChamComplexDouble, LDA, N, N ); PASTE_CODE_ALLOCATE_MATRIX_TILE( descAC, check, CHAMELEON_Complex64_t, ChamComplexDouble, LDA, N, N );
PASTE_CODE_ALLOCATE_MATRIX_TILE( descX, check, CHAMELEON_Complex64_t, ChamComplexDouble, LDB, N, NRHS ); PASTE_CODE_ALLOCATE_MATRIX_TILE( descX, check, CHAMELEON_Complex64_t, ChamComplexDouble, LDB, N, NRHS );
CHAMELEON_zplghe_Tile( (double)N, uplo, descA, 51 );
/* Save A for check */ /* Initialize data and save A if check */
if (check == 1){ if ( check ) {
CHAMELEON_zlacpy_Tile(uplo, descA, descAC); CHAMELEON_zplghe_Tile( (double)N, ChamUpperLower, descAC, 51 );
CHAMELEON_zlacpy_Tile( uplo, descAC, descA );
}
else {
CHAMELEON_zplghe_Tile( (double)N, uplo, descA, 51 );
} }
//RUNTIME_zlocality_allrestrict( STARPU_CUDA ); //RUNTIME_zlocality_allrestrict( STARPU_CUDA );
/* CHAMELEON ZPOTRF */ /* CHAMELEON ZPOTRF */
START_TIMING(); START_TIMING();
CHAMELEON_zpotrf_Tile(uplo, descA); CHAMELEON_zpotrf_Tile( uplo, descA );
STOP_TIMING(); STOP_TIMING();
/* Check the solution */ /* Check the solution */
...@@ -56,17 +58,17 @@ RunTest(int *iparam, double *dparam, chameleon_time_t *t_) ...@@ -56,17 +58,17 @@ RunTest(int *iparam, double *dparam, chameleon_time_t *t_)
{ {
/* Initialize and save B */ /* Initialize and save B */
CHAMELEON_zplrnt_Tile( descB, 7672 ); CHAMELEON_zplrnt_Tile( descB, 7672 );
CHAMELEON_zlacpy_Tile(ChamUpperLower, descB, descX); CHAMELEON_zlacpy_Tile( ChamUpperLower, descB, descX );
/* Compute the solution */ /* Compute the solution */
CHAMELEON_zpotrs_Tile( uplo, descA, descX ); CHAMELEON_zpotrs_Tile( uplo, descA, descX );
/* Check solution */ /* Check solution */
dparam[IPARAM_ANORM] = CHAMELEON_zlange_Tile(ChamInfNorm, descAC); dparam[IPARAM_ANORM] = CHAMELEON_zlange_Tile( ChamInfNorm, descAC );
dparam[IPARAM_BNORM] = CHAMELEON_zlange_Tile(ChamInfNorm, descB); dparam[IPARAM_BNORM] = CHAMELEON_zlange_Tile( ChamInfNorm, descB );
dparam[IPARAM_XNORM] = CHAMELEON_zlange_Tile(ChamInfNorm, descX); dparam[IPARAM_XNORM] = CHAMELEON_zlange_Tile( ChamInfNorm, descX );
CHAMELEON_zgemm_Tile( ChamNoTrans, ChamNoTrans, 1.0, descAC, descX, -1.0, descB ); CHAMELEON_zgemm_Tile( ChamNoTrans, ChamNoTrans, 1.0, descAC, descX, -1.0, descB );
dparam[IPARAM_RES] = CHAMELEON_zlange_Tile(ChamInfNorm, descB); dparam[IPARAM_RES] = CHAMELEON_zlange_Tile( ChamInfNorm, descB );
PASTE_CODE_FREE_MATRIX( descB ); PASTE_CODE_FREE_MATRIX( descB );
PASTE_CODE_FREE_MATRIX( descAC ); PASTE_CODE_FREE_MATRIX( descAC );
......
...@@ -42,7 +42,7 @@ RunTest(int *iparam, double *dparam, chameleon_time_t *t_) ...@@ -42,7 +42,7 @@ RunTest(int *iparam, double *dparam, chameleon_time_t *t_)
* It's done in static to avoid having the same sequence than one * It's done in static to avoid having the same sequence than one
* the function we want to trace * the function we want to trace
*/ */
CHAMELEON_zplghe_Tile( (double)N, ChamUpperLower, descA, 51 ); CHAMELEON_zplghe_Tile( (double)N, uplo, descA, 51 );
/* CHAMELEON ZPOTRF / ZTRTRI / ZLAUUM */ /* CHAMELEON ZPOTRF / ZTRTRI / ZLAUUM */
/* /*
......
...@@ -30,51 +30,51 @@ RunTest(int *iparam, double *dparam, chameleon_time_t *t_) ...@@ -30,51 +30,51 @@ RunTest(int *iparam, double *dparam, chameleon_time_t *t_)
PASTE_CODE_IPARAM_LOCALS( iparam ); PASTE_CODE_IPARAM_LOCALS( iparam );
cham_uplo_t uplo = ChamUpper; cham_uplo_t uplo = ChamUpper;
LDA = chameleon_max(LDA, N); LDA = chameleon_max( LDA, N );
check = 1;
/* Allocate Data */ /* Allocate Data */
PASTE_CODE_ALLOCATE_MATRIX_TILE( descA, 1, CHAMELEON_Complex64_t, ChamComplexDouble, LDA, N, N ); PASTE_CODE_ALLOCATE_MATRIX_TILE( descA, 1, CHAMELEON_Complex64_t, ChamComplexDouble, LDA, N, N );
PASTE_CODE_ALLOCATE_MATRIX_TILE( descB, check, CHAMELEON_Complex64_t, ChamComplexDouble, LDB, N, NRHS ); PASTE_CODE_ALLOCATE_MATRIX_TILE( descB, 1, CHAMELEON_Complex64_t, ChamComplexDouble, LDB, N, NRHS );
PASTE_CODE_ALLOCATE_MATRIX_TILE( descAC, check, CHAMELEON_Complex64_t, ChamComplexDouble, LDA, N, N ); PASTE_CODE_ALLOCATE_MATRIX_TILE( descAC, check, CHAMELEON_Complex64_t, ChamComplexDouble, LDA, N, N );
PASTE_CODE_ALLOCATE_MATRIX_TILE( descX, check, CHAMELEON_Complex64_t, ChamComplexDouble, LDB, N, NRHS ); PASTE_CODE_ALLOCATE_MATRIX_TILE( descX, 1, CHAMELEON_Complex64_t, ChamComplexDouble, LDB, N, NRHS );
CHAMELEON_zplghe_Tile( (double)N, ChamUpperLower, descA, 51 );
/* Save A for check */ /* Initialize data and save A and B if check */
if (check == 1){ CHAMELEON_zplrnt_Tile( descX, 7672 );
CHAMELEON_zlacpy_Tile(ChamUpperLower, descA, descAC); if ( check ) {
} CHAMELEON_zplghe_Tile( (double)N, ChamUpperLower, descAC, 51 );
CHAMELEON_zlacpy_Tile( uplo, descAC, descA );
CHAMELEON_zlacpy_Tile( ChamUpperLower, descX, descB );
}
else {
CHAMELEON_zplghe_Tile( (double)N, uplo, descA, 51 );
}
//RUNTIME_zlocality_allrestrict( STARPU_CUDA ); //RUNTIME_zlocality_allrestrict( STARPU_CUDA );
/* CHAMELEON ZPOTRF */ /* CHAMELEON ZPOTRF */
CHAMELEON_zpotrf_Tile(uplo, descA); CHAMELEON_zpotrf_Tile(uplo, descA);
/* Compute the solution */
START_TIMING();
CHAMELEON_zpotrs_Tile( uplo, descA, descX );
STOP_TIMING();
/* Check the solution */ /* Check the solution */
if ( check ) if ( check )
{ {
/* Initialize and save B */
CHAMELEON_zplrnt_Tile( descB, 7672 );
CHAMELEON_zlacpy_Tile(ChamUpperLower, descB, descX);
/* Compute the solution */
START_TIMING();
CHAMELEON_zpotrs_Tile( uplo, descA, descX );
STOP_TIMING();
/* Check solution */ /* Check solution */
dparam[IPARAM_ANORM] = CHAMELEON_zlange_Tile(ChamInfNorm, descAC); dparam[IPARAM_ANORM] = CHAMELEON_zlange_Tile( ChamInfNorm, descAC );
dparam[IPARAM_BNORM] = CHAMELEON_zlange_Tile(ChamInfNorm, descB); dparam[IPARAM_BNORM] = CHAMELEON_zlange_Tile( ChamInfNorm, descB );
dparam[IPARAM_XNORM] = CHAMELEON_zlange_Tile(ChamInfNorm, descX); dparam[IPARAM_XNORM] = CHAMELEON_zlange_Tile( ChamInfNorm, descX );
CHAMELEON_zgemm_Tile( ChamNoTrans, ChamNoTrans, 1.0, descAC, descX, -1.0, descB ); CHAMELEON_zgemm_Tile( ChamNoTrans, ChamNoTrans, 1.0, descAC, descX, -1.0, descB );
dparam[IPARAM_RES] = CHAMELEON_zlange_Tile(ChamInfNorm, descB); dparam[IPARAM_RES] = CHAMELEON_zlange_Tile( ChamInfNorm, descB );
PASTE_CODE_FREE_MATRIX( descB );
PASTE_CODE_FREE_MATRIX( descAC ); PASTE_CODE_FREE_MATRIX( descAC );
PASTE_CODE_FREE_MATRIX( descX );
} }
PASTE_CODE_FREE_MATRIX( descA ); PASTE_CODE_FREE_MATRIX( descA );
PASTE_CODE_FREE_MATRIX( descX );
PASTE_CODE_FREE_MATRIX( descB );
return 0; return 0;
} }
...@@ -37,37 +37,38 @@ RunTest(int *iparam, double *dparam, chameleon_time_t *t_) ...@@ -37,37 +37,38 @@ RunTest(int *iparam, double *dparam, chameleon_time_t *t_)
PASTE_CODE_ALLOCATE_MATRIX_TILE( descB, check, CHAMELEON_Complex64_t, ChamComplexDouble, LDB, N, NRHS ); PASTE_CODE_ALLOCATE_MATRIX_TILE( descB, check, CHAMELEON_Complex64_t, ChamComplexDouble, LDB, N, NRHS );
PASTE_CODE_ALLOCATE_MATRIX_TILE( descAC, check, CHAMELEON_Complex64_t, ChamComplexDouble, LDA, N, N ); PASTE_CODE_ALLOCATE_MATRIX_TILE( descAC, check, CHAMELEON_Complex64_t, ChamComplexDouble, LDA, N, N );
PASTE_CODE_ALLOCATE_MATRIX_TILE( descX, check, CHAMELEON_Complex64_t, ChamComplexDouble, LDB, N, NRHS ); PASTE_CODE_ALLOCATE_MATRIX_TILE( descX, check, CHAMELEON_Complex64_t, ChamComplexDouble, LDB, N, NRHS );
CHAMELEON_zplgsy_Tile( (double)N, ChamUpperLower, descA, 51 );
/* Save A for check */ /* Initialize data and save A if check */
if (check == 1){ if ( check ) {
CHAMELEON_zlacpy_Tile(ChamUpperLower, descA, descAC); CHAMELEON_zplgsy_Tile( (double)N, ChamUpperLower, descAC, 51 );
CHAMELEON_zlacpy_Tile( uplo, descAC, descA );
}
else {
CHAMELEON_zplgsy_Tile( (double)N, uplo, descA, 51 );
} }
/* CHAMELEON ZSYSV */ /* CHAMELEON ZSYSV */
START_TIMING(); START_TIMING();
CHAMELEON_zsytrf_Tile(uplo, descA); CHAMELEON_zsytrf_Tile( uplo, descA );
STOP_TIMING(); STOP_TIMING();
/* Check the solution */ /* Check the solution */
if ( check ) if ( check )
{ {
CHAMELEON_zplrnt_Tile( descB, 7672 ); CHAMELEON_zplrnt_Tile( descB, 7672 );
CHAMELEON_zlacpy_Tile(ChamUpperLower, descB, descX); CHAMELEON_zlacpy_Tile( ChamUpperLower, descB, descX );
CHAMELEON_zsytrs_Tile( uplo, descA, descX ); CHAMELEON_zsytrs_Tile( uplo, descA, descX );
dparam[IPARAM_ANORM] = CHAMELEON_zlange_Tile(ChamInfNorm, descAC); dparam[IPARAM_ANORM] = CHAMELEON_zlange_Tile( ChamInfNorm, descAC );
dparam[IPARAM_BNORM] = CHAMELEON_zlange_Tile(ChamInfNorm, descB); dparam[IPARAM_BNORM] = CHAMELEON_zlange_Tile( ChamInfNorm, descB );
dparam[IPARAM_XNORM] = CHAMELEON_zlange_Tile(ChamInfNorm, descX); dparam[IPARAM_XNORM] = CHAMELEON_zlange_Tile( ChamInfNorm, descX );
CHAMELEON_zgemm_Tile( ChamNoTrans, ChamNoTrans, 1.0, descAC, descX, -1.0, descB ); CHAMELEON_zgemm_Tile( ChamNoTrans, ChamNoTrans, 1.0, descAC, descX, -1.0, descB );
dparam[IPARAM_RES] = CHAMELEON_zlange_Tile(ChamInfNorm, descB); dparam[IPARAM_RES] = CHAMELEON_zlange_Tile( ChamInfNorm, descB );
PASTE_CODE_FREE_MATRIX( descB ); PASTE_CODE_FREE_MATRIX( descB );
PASTE_CODE_FREE_MATRIX( descAC ); PASTE_CODE_FREE_MATRIX( descAC );
PASTE_CODE_FREE_MATRIX( descX ); PASTE_CODE_FREE_MATRIX( descX );
} }
PASTE_CODE_FREE_MATRIX( descA ); PASTE_CODE_FREE_MATRIX( descA );
return 0; return 0;
} }
...@@ -29,6 +29,8 @@ static int ...@@ -29,6 +29,8 @@ static int
RunTest(int *iparam, double *dparam, chameleon_time_t *t_) RunTest(int *iparam, double *dparam, chameleon_time_t *t_)
{ {
CHAMELEON_Complex64_t alpha; CHAMELEON_Complex64_t alpha;
cham_uplo_t uplo = ChamLower;
PASTE_CODE_IPARAM_LOCALS( iparam ); PASTE_CODE_IPARAM_LOCALS( iparam );
LDA = chameleon_max( LDA, N ); LDA = chameleon_max( LDA, N );
...@@ -38,26 +40,25 @@ RunTest(int *iparam, double *dparam, chameleon_time_t *t_) ...@@ -38,26 +40,25 @@ RunTest(int *iparam, double *dparam, chameleon_time_t *t_)
PASTE_CODE_ALLOCATE_MATRIX( B, 1, CHAMELEON_Complex64_t, LDB, NRHS); PASTE_CODE_ALLOCATE_MATRIX( B, 1, CHAMELEON_Complex64_t, LDB, NRHS);
PASTE_CODE_ALLOCATE_MATRIX( B2, check, CHAMELEON_Complex64_t, LDB, NRHS); PASTE_CODE_ALLOCATE_MATRIX( B2, check, CHAMELEON_Complex64_t, LDB, NRHS);
/* Initialiaze Data */ /* Initialize Data */
CHAMELEON_zplgsy( (CHAMELEON_Complex64_t)N, ChamUpperLower, N, A, LDA, 453 ); CHAMELEON_zplgsy( (CHAMELEON_Complex64_t)N, uplo, N, A, LDA, 453 );
CHAMELEON_zplrnt( N, NRHS, B, LDB, 5673 ); CHAMELEON_zplrnt( N, NRHS, B, LDB, 5673 );
LAPACKE_zlarnv_work(1, ISEED, 1, &alpha); LAPACKE_zlarnv_work(1, ISEED, 1, &alpha);
alpha = 10.; /*alpha * N / 2.;*/ alpha = 10.; /*alpha * N / 2.;*/