Commit 7b385691 authored by PRUVOST Florent's avatar PRUVOST Florent

set a list of compile definitions

parent a16b1fb4
......@@ -60,6 +60,8 @@ include(GenPkgConfig)
# Parameters/Options #
######################
set(CHAMELEON_DEFINITIONS_LIST "")
# Add define for Fortran Mangling (should be defined somewhere else)
# ------------------------------------------------------------------
add_definitions(-DADD_)
......@@ -686,6 +688,7 @@ endif()
###############################################################################
# Print Options #
#################
get_directory_property( CHAMELEON_DEFINITIONS_LIST DIRECTORY ${CMAKE_SOURCE_DIR} COMPILE_DEFINITIONS )
include(PrintOpts)
###
......
......@@ -121,27 +121,30 @@ static void cl_zgemm_cuda_func(void *descr[], void *cl_arg)
cuDoubleComplex beta;
cuDoubleComplex *C;
int ldc;
CUstream stream;
cublasHandle_t handle;
cublasStatus_t stat;
cublasOperation_t cublasTransA;
cublasOperation_t cublasTransB;
A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
B = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
starpu_codelet_unpack_args(cl_arg, &transA, &transB, &m, &n, &k, &alpha, &lda, &ldb, &beta, &ldc);
cublasHandle_t handle;
cublasStatus_t stat = cublasCreate(&handle);
stat = cublasCreate(&handle);
if (stat != CUBLAS_STATUS_SUCCESS) {
printf ("CUBLAS initialization failed\n");
assert( stat == CUBLAS_STATUS_SUCCESS );
}
CUstream stream = starpu_cuda_get_local_stream();
stream = starpu_cuda_get_local_stream();
stat = cublasSetStream(handle, stream);
if (stat != CUBLAS_STATUS_SUCCESS) {
printf ("cublasSetStream failed\n");
assert( stat == CUBLAS_STATUS_SUCCESS );
}
cublasOperation_t cublasTransA;
if (transA == MorseNoTrans){
cublasTransA = CUBLAS_OP_N;
}else if(transA == MorseTrans){
......@@ -151,7 +154,6 @@ static void cl_zgemm_cuda_func(void *descr[], void *cl_arg)
}else{
fprintf(stderr, "Error in cl_zgemm_cuda_func: bad transA parameter %d\n", transA);
}
cublasOperation_t cublasTransB;
if (transB == MorseNoTrans){
cublasTransB = CUBLAS_OP_N;
}else if(transB == MorseTrans){
......
......@@ -162,14 +162,17 @@ magma_zgemerge_gpu(magma_side_t side, magma_diag_t diag,
{
int i, j;
magmaDoubleComplex *cola, *colb;
CUstream stream;
cublasHandle_t handle;
cublasStatus_t stat = cublasCreate(&handle);
cublasStatus_t stat;
stat = cublasCreate(&handle);
if (stat != CUBLAS_STATUS_SUCCESS) {
printf ("CUBLAS initialization failed\n");
assert( stat == CUBLAS_STATUS_SUCCESS );
}
CUstream stream = starpu_cuda_get_local_stream();
stream = starpu_cuda_get_local_stream();
stat = cublasSetStream(handle, stream);
if (stat != CUBLAS_STATUS_SUCCESS) {
printf ("cublasSetStream failed\n");
......@@ -234,13 +237,19 @@ magma_zgemerge_gpu(magma_side_t side, magma_diag_t diag,
for(i=0; i<N; i++){
cola = A + i*LDA;
colb = B + i*LDB;
cublasZcopy(i+1, cola, 1, colb, 1);
//cublasZcopy(i+1, cola, 1, colb, 1);
cudaMemcpy(colb , cola,
(i+1)*sizeof(cuDoubleComplex),
cudaMemcpyDeviceToDevice );
}
}else{
for(i=0; i<N; i++){
cola = A + i*LDA;
colb = B + i*LDB;
cublasZcopy(M-i, cola + i, 1, colb + i, 1);
//cublasZcopy(M-i, cola + i, 1, colb + i, 1);
cudaMemcpy(colb+i , cola+i,
(M-i)*sizeof(cuDoubleComplex),
cudaMemcpyDeviceToDevice );
}
}
......@@ -291,9 +300,13 @@ magma_zgeqrt_gpu( magma_int_t m, magma_int_t n, magma_int_t nb,
cudaMemset(dt_ref(0,0), 0, nb*n*sizeof(magmaDoubleComplex));
/* copy first panel of A on the host */
cublasGetMatrix(m, min(nb,n), sizeof(magmaDoubleComplex),
da_ref(0, 0), ldda,
v, ldv);
// cublasGetMatrix(m, min(nb,n), sizeof(magmaDoubleComplex),
// da_ref(0, 0), ldda,
// v, ldv);
/* copy first panel of A on the host */
cudaMemcpy( v, da_ref(0,0),
m*min(nb,n)*sizeof(magmaDoubleComplex),
cudaMemcpyDeviceToHost );
/* Use blocked code initially */
for (i = 0; i < k; i += nb) {
......@@ -305,9 +318,13 @@ magma_zgeqrt_gpu( magma_int_t m, magma_int_t n, magma_int_t nb,
if (i>0){
/* copy panel of A from device to host */
cublasGetMatrix(m, ib, sizeof(magmaDoubleComplex),
da_ref(0, i), ldda,
v, ldv);
// cublasGetMatrix(m, ib, sizeof(magmaDoubleComplex),
// da_ref(0, i), ldda,
// v, ldv);
/* copy panel of A from device to host */
cudaMemcpy( v, da_ref(0,i),
m*ib*sizeof(magmaDoubleComplex),
cudaMemcpyDeviceToHost );
/* Apply H' to A(i:m,i+2*ib:n) from the left */
cols = n-old_i-2*old_ib;
......
......@@ -117,27 +117,30 @@ static void cl_zhemm_cuda_func(void *descr[], void *cl_arg)
cuDoubleComplex beta;
cuDoubleComplex *C;
int LDC;
CUstream stream;
cublasHandle_t handle;
cublasStatus_t stat;
cublasSideMode_t cublasSide;
cublasFillMode_t cublasUplo;
A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
B = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
starpu_codelet_unpack_args(cl_arg, &side, &uplo, &M, &N, &alpha, &LDA, &LDB, &beta, &LDC);
cublasHandle_t handle;
cublasStatus_t stat = cublasCreate(&handle);
stat = cublasCreate(&handle);
if (stat != CUBLAS_STATUS_SUCCESS) {
printf ("CUBLAS initialization failed\n");
assert( stat == CUBLAS_STATUS_SUCCESS );
}
CUstream stream = starpu_cuda_get_local_stream();
stream = starpu_cuda_get_local_stream();
stat = cublasSetStream(handle, stream);
if (stat != CUBLAS_STATUS_SUCCESS) {
printf ("cublasSetStream failed\n");
assert( stat == CUBLAS_STATUS_SUCCESS );
}
cublasSideMode_t cublasSide;
if (side == MorseLeft){
cublasSide = CUBLAS_SIDE_LEFT;
}else if (side == MorseRight){
......@@ -145,7 +148,6 @@ static void cl_zhemm_cuda_func(void *descr[], void *cl_arg)
}else{
fprintf(stderr, "Error in cl_zhemm_cuda_func: bad side parameter %d\n", side);
}
cublasFillMode_t cublasUplo;
if (uplo == MorseUpper){
cublasUplo = CUBLAS_FILL_MODE_UPPER;
}else if(uplo == MorseLower){
......
......@@ -112,27 +112,30 @@ static void cl_zher2k_cuda_func(void *descr[], void *cl_arg)
double beta;
cuDoubleComplex *C;
int ldc;
CUstream stream;
cublasHandle_t handle;
cublasStatus_t stat;
cublasFillMode_t cublasUplo;
cublasOperation_t cublasTrans;
A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
B = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &lda, &ldb, &beta, &ldc);
cublasHandle_t handle;
cublasStatus_t stat = cublasCreate(&handle);
stat = cublasCreate(&handle);
if (stat != CUBLAS_STATUS_SUCCESS) {
printf ("CUBLAS initialization failed\n");
assert( stat == CUBLAS_STATUS_SUCCESS );
}
CUstream stream = starpu_cuda_get_local_stream();
stream = starpu_cuda_get_local_stream();
stat = cublasSetStream(handle, stream);
if (stat != CUBLAS_STATUS_SUCCESS) {
printf ("cublasSetStream failed\n");
assert( stat == CUBLAS_STATUS_SUCCESS );
}
cublasFillMode_t cublasUplo;
if (uplo == MorseUpper){
cublasUplo = CUBLAS_FILL_MODE_UPPER;
}else if(uplo == MorseLower){
......@@ -142,8 +145,6 @@ static void cl_zher2k_cuda_func(void *descr[], void *cl_arg)
}else{
fprintf(stderr, "Error in cl_zher2k_cuda_func: bad uplo parameter %d\n", uplo);
}
cublasOperation_t cublasTrans;
if (trans == MorseNoTrans){
cublasTrans = CUBLAS_OP_N;
}else if(trans == MorseTrans){
......
......@@ -106,26 +106,29 @@ static void cl_zherk_cuda_func(void *descr[], void *cl_arg)
double beta;
cuDoubleComplex *C;
int ldc;
CUstream stream;
cublasHandle_t handle;
cublasStatus_t stat;
cublasFillMode_t cublasUplo;
cublasOperation_t cublasTrans;
A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &lda, &beta, &ldc);
cublasHandle_t handle;
cublasStatus_t stat = cublasCreate(&handle);
stat = cublasCreate(&handle);
if (stat != CUBLAS_STATUS_SUCCESS) {
printf ("CUBLAS initialization failed\n");
assert( stat == CUBLAS_STATUS_SUCCESS );
}
CUstream stream = starpu_cuda_get_local_stream();
stream = starpu_cuda_get_local_stream();
stat = cublasSetStream(handle, stream);
if (stat != CUBLAS_STATUS_SUCCESS) {
printf ("cublasSetStream failed\n");
assert( stat == CUBLAS_STATUS_SUCCESS );
}
cublasFillMode_t cublasUplo;
if (uplo == MorseUpper){
cublasUplo = CUBLAS_FILL_MODE_UPPER;
}else if(uplo == MorseLower){
......@@ -135,8 +138,6 @@ static void cl_zherk_cuda_func(void *descr[], void *cl_arg)
}else{
fprintf(stderr, "Error in cl_zherk_cuda_func: bad uplo parameter %d\n", uplo);
}
cublasOperation_t cublasTrans;
if (trans == MorseNoTrans){
cublasTrans = CUBLAS_OP_N;
}else if(trans == MorseTrans){
......
......@@ -117,27 +117,30 @@ static void cl_zsymm_cuda_func(void *descr[], void *cl_arg)
cuDoubleComplex beta;
cuDoubleComplex *C;
int LDC;
CUstream stream;
cublasHandle_t handle;
cublasStatus_t stat;
cublasSideMode_t cublasSide;
cublasFillMode_t cublasUplo;
A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
B = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
starpu_codelet_unpack_args(cl_arg, &side, &uplo, &M, &N, &alpha, &LDA, &LDB, &beta, &LDC);
cublasHandle_t handle;
cublasStatus_t stat = cublasCreate(&handle);
stat = cublasCreate(&handle);
if (stat != CUBLAS_STATUS_SUCCESS) {
printf ("CUBLAS initialization failed\n");
assert( stat == CUBLAS_STATUS_SUCCESS );
}
CUstream stream = starpu_cuda_get_local_stream();
stream = starpu_cuda_get_local_stream();
stat = cublasSetStream(handle, stream);
if (stat != CUBLAS_STATUS_SUCCESS) {
printf ("cublasSetStream failed\n");
assert( stat == CUBLAS_STATUS_SUCCESS );
}
cublasSideMode_t cublasSide;
if (side == MorseLeft){
cublasSide = CUBLAS_SIDE_LEFT;
}else if (side == MorseRight){
......@@ -145,7 +148,6 @@ static void cl_zsymm_cuda_func(void *descr[], void *cl_arg)
}else{
fprintf(stderr, "Error in cl_zsymm_cuda_func: bad side parameter %d\n", side);
}
cublasFillMode_t cublasUplo;
if (uplo == MorseUpper){
cublasUplo = CUBLAS_FILL_MODE_UPPER;
}else if(uplo == MorseLower){
......
......@@ -112,27 +112,30 @@ static void cl_zsyr2k_cuda_func(void *descr[], void *cl_arg)
cuDoubleComplex beta;
cuDoubleComplex *C;
int ldc;
CUstream stream;
cublasHandle_t handle;
cublasStatus_t stat;
cublasFillMode_t cublasUplo;
cublasOperation_t cublasTrans;
A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
B = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &lda, &ldb, &beta, &ldc);
cublasHandle_t handle;
cublasStatus_t stat = cublasCreate(&handle);
stat = cublasCreate(&handle);
if (stat != CUBLAS_STATUS_SUCCESS) {
printf ("CUBLAS initialization failed\n");
assert( stat == CUBLAS_STATUS_SUCCESS );
}
CUstream stream = starpu_cuda_get_local_stream();
stream = starpu_cuda_get_local_stream();
stat = cublasSetStream(handle, stream);
if (stat != CUBLAS_STATUS_SUCCESS) {
printf ("cublasSetStream failed\n");
assert( stat == CUBLAS_STATUS_SUCCESS );
}
cublasFillMode_t cublasUplo;
if (uplo == MorseUpper){
cublasUplo = CUBLAS_FILL_MODE_UPPER;
}else if(uplo == MorseLower){
......@@ -142,8 +145,6 @@ static void cl_zsyr2k_cuda_func(void *descr[], void *cl_arg)
}else{
fprintf(stderr, "Error in cl_zsyr2k_cuda_func: bad uplo parameter %d\n", uplo);
}
cublasOperation_t cublasTrans;
if (trans == MorseNoTrans){
cublasTrans = CUBLAS_OP_N;
}else if(trans == MorseTrans){
......
......@@ -107,26 +107,29 @@ static void cl_zsyrk_cuda_func(void *descr[], void *cl_arg)
cuDoubleComplex beta;
cuDoubleComplex *C;
int ldc;
CUstream stream;
cublasHandle_t handle;
cublasStatus_t stat;
cublasFillMode_t cublasUplo;
cublasOperation_t cublasTrans;
A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &lda, &beta, &ldc);
cublasHandle_t handle;
cublasStatus_t stat = cublasCreate(&handle);
stat = cublasCreate(&handle);
if (stat != CUBLAS_STATUS_SUCCESS) {
printf ("CUBLAS initialization failed\n");
assert( stat == CUBLAS_STATUS_SUCCESS );
}
CUstream stream = starpu_cuda_get_local_stream();
stream = starpu_cuda_get_local_stream();
stat = cublasSetStream(handle, stream);
if (stat != CUBLAS_STATUS_SUCCESS) {
printf ("cublasSetStream failed\n");
assert( stat == CUBLAS_STATUS_SUCCESS );
}
cublasFillMode_t cublasUplo;
if (uplo == MorseUpper){
cublasUplo = CUBLAS_FILL_MODE_UPPER;
}else if(uplo == MorseLower){
......@@ -136,8 +139,6 @@ static void cl_zsyrk_cuda_func(void *descr[], void *cl_arg)
}else{
fprintf(stderr, "Error in cl_zsyrk_cuda_func: bad uplo parameter %d\n", uplo);
}
cublasOperation_t cublasTrans;
if (trans == MorseNoTrans){
cublasTrans = CUBLAS_OP_N;
}else if(trans == MorseTrans){
......
......@@ -111,26 +111,31 @@ static void cl_ztrmm_cuda_func(void *descr[], void *cl_arg)
int LDA;
cuDoubleComplex *B;
int LDB;
CUstream stream;
cublasHandle_t handle;
cublasStatus_t stat;
cublasSideMode_t cublasSide;
cublasFillMode_t cublasUplo;
cublasOperation_t cublasTransA;
cublasDiagType_t cublasDiag;
A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
starpu_codelet_unpack_args(cl_arg, &side, &uplo, &transA, &diag, &M, &N, &alpha, &LDA, &LDB);
cublasHandle_t handle;
cublasStatus_t stat = cublasCreate(&handle);
stat = cublasCreate(&handle);
if (stat != CUBLAS_STATUS_SUCCESS) {
printf ("CUBLAS initialization failed\n");
assert( stat == CUBLAS_STATUS_SUCCESS );
}
CUstream stream = starpu_cuda_get_local_stream();
stream = starpu_cuda_get_local_stream();
stat = cublasSetStream(handle, stream);
if (stat != CUBLAS_STATUS_SUCCESS) {
printf ("cublasSetStream failed\n");
assert( stat == CUBLAS_STATUS_SUCCESS );
}
cublasSideMode_t cublasSide;
if (side == MorseLeft){
cublasSide = CUBLAS_SIDE_LEFT;
}else if (side == MorseRight){
......@@ -138,7 +143,6 @@ static void cl_ztrmm_cuda_func(void *descr[], void *cl_arg)
}else{
fprintf(stderr, "Error in cl_ztrmm_cuda_func: bad side parameter %d\n", side);
}
cublasFillMode_t cublasUplo;
if (uplo == MorseUpper){
cublasUplo = CUBLAS_FILL_MODE_UPPER;
}else if(uplo == MorseLower){
......@@ -148,7 +152,6 @@ static void cl_ztrmm_cuda_func(void *descr[], void *cl_arg)
}else{
fprintf(stderr, "Error in cl_ztrmm_cuda_func: bad uplo parameter %d\n", uplo);
}
cublasOperation_t cublasTransA;
if (transA == MorseNoTrans){
cublasTransA = CUBLAS_OP_N;
}else if(transA == MorseTrans){
......@@ -158,7 +161,6 @@ static void cl_ztrmm_cuda_func(void *descr[], void *cl_arg)
}else{
fprintf(stderr, "Error in cl_ztrmm_cuda_func: bad transA parameter %d\n", transA);
}
cublasDiagType_t cublasDiag;
if (diag == MorseNonUnit){
cublasDiag = CUBLAS_DIAG_NON_UNIT;
}else if(diag == MorseUnit){
......
......@@ -111,26 +111,31 @@ static void cl_ztrsm_cuda_func(void *descr[], void *cl_arg)
int lda;
cuDoubleComplex *B;
int ldb;
CUstream stream;
cublasHandle_t handle;
cublasStatus_t stat;
cublasSideMode_t cublasSide;
cublasFillMode_t cublasUplo;
cublasOperation_t cublasTransA;
cublasDiagType_t cublasDiag;
A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
starpu_codelet_unpack_args(cl_arg, &side, &uplo, &transA, &diag, &m, &n, &alpha, &lda, &ldb);
cublasHandle_t handle;
cublasStatus_t stat = cublasCreate(&handle);
stat = cublasCreate(&handle);
if (stat != CUBLAS_STATUS_SUCCESS) {
printf ("CUBLAS initialization failed\n");
assert( stat == CUBLAS_STATUS_SUCCESS );
}
CUstream stream = starpu_cuda_get_local_stream();
stream = starpu_cuda_get_local_stream();
stat = cublasSetStream(handle, stream);
if (stat != CUBLAS_STATUS_SUCCESS) {
printf ("cublasSetStream failed\n");
assert( stat == CUBLAS_STATUS_SUCCESS );
}
cublasSideMode_t cublasSide;
if (side == MorseLeft){
cublasSide = CUBLAS_SIDE_LEFT;
}else if (side == MorseRight){
......@@ -138,7 +143,6 @@ static void cl_ztrsm_cuda_func(void *descr[], void *cl_arg)
}else{
fprintf(stderr, "Error in cl_ztrsm_cuda_func: bad side parameter %d\n", side);
}
cublasFillMode_t cublasUplo;
if (uplo == MorseUpper){
cublasUplo = CUBLAS_FILL_MODE_UPPER;
}else if(uplo == MorseLower){
......@@ -148,7 +152,6 @@ static void cl_ztrsm_cuda_func(void *descr[], void *cl_arg)
}else{
fprintf(stderr, "Error in cl_ztrsm_cuda_func: bad uplo parameter %d\n", uplo);
}
cublasOperation_t cublasTransA;
if (transA == MorseNoTrans){
cublasTransA = CUBLAS_OP_N;
}else if(transA == MorseTrans){
......@@ -158,7 +161,6 @@ static void cl_ztrsm_cuda_func(void *descr[], void *cl_arg)
}else{
fprintf(stderr, "Error in cl_ztrsm_cuda_func: bad transA parameter %d\n", transA);
}
cublasDiagType_t cublasDiag;
if (diag == MorseNonUnit){
cublasDiag = CUBLAS_DIAG_NON_UNIT;
}else if(diag == MorseUnit){
......
......@@ -245,7 +245,12 @@ magma_zparfb_gpu(magma_side_t side, magma_trans_t trans,
magma_trans_t transW;
magma_trans_t transA2;
cublasHandle_t handle;
cublasStatus_t stat = cublasCreate(&handle);
cublasStatus_t stat;
cublasOperation_t cublasTrans;
cublasOperation_t cublasTransW;
cublasOperation_t cublasTransA2;
stat = cublasCreate(&handle);
if (stat != CUBLAS_STATUS_SUCCESS) {
printf ("CUBLAS initialization failed\n");
assert( stat == CUBLAS_STATUS_SUCCESS );
......@@ -257,7 +262,6 @@ magma_zparfb_gpu(magma_side_t side, magma_trans_t trans,
assert( stat == CUBLAS_STATUS_SUCCESS );
}
cublasOperation_t cublasTrans;
if (trans == MagmaNoTrans){
cublasTrans = CUBLAS_OP_N;
}else if(trans == MagmaTrans){
......@@ -329,7 +333,6 @@ magma_zparfb_gpu(magma_side_t side, magma_trans_t trans,
transW = storev == MorseColumnwise ? MagmaConjTrans : MagmaNoTrans;
transA2 = storev == MorseColumnwise ? MagmaNoTrans : MagmaConjTrans;
cublasOperation_t cublasTransW;
if (transW == MagmaNoTrans){
cublasTransW = CUBLAS_OP_N;
}else if(transW == MagmaTrans){
......@@ -339,7 +342,6 @@ magma_zparfb_gpu(magma_side_t side, magma_trans_t trans,
}else{
fprintf(stderr, "Error in magma_zparfb_gpu: bad transW parameter %d\n", transW);
}
cublasOperation_t cublasTransA2;
if (transA2 == MagmaNoTrans){
cublasTransA2 = CUBLAS_OP_N;
}else if(transA2 == MagmaTrans){
......@@ -358,7 +360,6 @@ magma_zparfb_gpu(magma_side_t side, magma_trans_t trans,
(const cuDoubleComplex *) &zone,
(cuDoubleComplex*)WORK /* K*N1 */, LDWORK);
WORKC = NULL;
if (WORKC == NULL) {
/* W = op(T) * W */
cublasZtrmm( handle,
......@@ -437,7 +438,6 @@ magma_zparfb_gpu(magma_side_t side, magma_trans_t trans,
transW = storev == MorseColumnwise ? MagmaNoTrans : MagmaConjTrans;
transA2 = storev == MorseColumnwise ? MagmaConjTrans : MagmaNoTrans;
cublasOperation_t cublasTransW;
if (transW == MagmaNoTrans){
cublasTransW = CUBLAS_OP_N;
}else if(transW == MagmaTrans){
......@@ -447,7 +447,6 @@ magma_zparfb_gpu(magma_side_t side, magma_trans_t trans,
}else{
fprintf(stderr, "Error in magma_zparfb_gpu: bad transW parameter %d\n", transW);
}
cublasOperation_t cublasTransA2;
if (transA2 == MagmaNoTrans){
cublasTransA2 = CUBLAS_OP_N;
}else if(transA2 == MagmaTrans){
......@@ -466,7 +465,6 @@ magma_zparfb_gpu(magma_side_t side, magma_trans_t trans,
(const cuDoubleComplex *) &zone,
(cuDoubleComplex*)WORK /* M1*K */, LDWORK);
WORKC = NULL;
if (WORKC == NULL) {
/* W = W * op(T) */
cublasZtrmm( handle,
......@@ -548,7 +546,6 @@ magma_zparfb_gpu(magma_side_t side, magma_trans_t trans,
magmaDoubleComplex *WORK, magma_int_t LDWORK,
magmaDoubleComplex *WORKC, magma_int_t LDWORKC,
CUstream stream)
{
#if defined(PRECISION_z) || defined(PRECISION_c)
cuDoubleComplex zzero = make_cuDoubleComplex(0.0, 0.0);
......
......@@ -213,11 +213,17 @@ magma_ztsqrt2_gpu( magma_int_t m, magma_int_t n, magma_int_t nb,
cublasGetMatrix(nb, nb, sizeof(magmaDoubleComplex),
da1_ref(0, 0), ldda1,
d, ldd);
// cudaMemcpy( d, da1_ref(0,0),
// nb*nb*sizeof(cuDoubleComplex),
// cudaMemcpyDeviceToHost );
/* copy first panel of A2 from device to host: da2 -> a2 */
cublasGetMatrix(m, nb, sizeof(magmaDoubleComplex),
da2_ref(0, 0), ldda2,
a2, lda2);
// cublasGetMatrix(m, nb, sizeof(magmaDoubleComplex),
// da2_ref(0, 0), ldda2,
// a2, lda2);
cudaMemcpy( a2, da2_ref(0, 0),
m*nb*sizeof(cuDoubleComplex),
cudaMemcpyDeviceToHost );
/* This is only blocked code for now */
for (i = 0; i < n; i += nb) {
......@@ -234,11 +240,17 @@ magma_ztsqrt2_gpu( magma_int_t m, magma_int_t n, magma_int_t nb,
cublasGetMatrix(ib, ib, sizeof(magmaDoubleComplex),
da1_ref(i, i), ldda1,
d, ldd);
// cudaMemcpy( d, da1_ref(i,i),
// ib*ib*sizeof(cuDoubleComplex),
// cudaMemcpyDeviceToHost );
/* copy panel of A2 from device to host: da2 -> a2 */
cublasGetMatrix(rows, ib, sizeof(magmaDoubleComplex),
da2_ref(0, i), ldda2,
a2, lda2);
// cudaMemcpy( a2, da2_ref(0,i),
// rows*ib*sizeof(cuDoubleComplex),
// cudaMemcpyDeviceToHost );
/* Apply H' to A(i:m,i+2*ib:n) from the left */
cols = n-old_i-2*old_ib;
......@@ -268,16 +280,25 @@ magma_ztsqrt2_gpu( magma_int_t m, magma_int_t n, magma_int_t nb,
cublasSetMatrix(rows, ib, sizeof(magmaDoubleComplex),
a2, lda2,
da2_ref(0, i), ldda2);
// cudaMemcpy( da2_ref(0,i), a2,
// rows*ib*sizeof(cuDoubleComplex),
// cudaMemcpyHostToDevice );
/* Send the triangular factor T from hwork to the GPU */
cublasSetMatrix(ib, ib, sizeof(magmaDoubleComplex),
t, ldt,
dt_ref(0, i), lddt);
// cudaMemcpy( dt_ref(0,i), t,
// ib*ib*sizeof(cuDoubleComplex),
// cudaMemcpyHostToDevice );
/* get back the diag tile in A1 from host to device: d -> da1 */
cublasSetMatrix(ib, ib, sizeof(magmaDoubleComplex),
d, ldd,
da1_ref(i, i), ldda1);
// cudaMemcpy( da1_ref(i, i), d,
// ib*ib*sizeof(cuDoubleComplex),
// cudaMemcpyHostToDevice );
/* tsmqr update on one panel forward (look ahead 1) */
if (i + ib < n) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment