Newer
Older
/**
*

PRUVOST Florent
committed
* @copyright (c) 2009-2014 The University of Tennessee and The University
* of Tennessee Research Foundation.
* All rights reserved.
* @copyright (c) 2012-2014 Inria. All rights reserved.

PRUVOST Florent
committed
* @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
*
**/
/**
*
* @file codelet_zhemm.c
*
* MORSE codelets kernel
* MORSE is a software package provided by Univ. of Tennessee,
* Univ. of California Berkeley and Univ. of Colorado Denver
*
* @version 2.5.0
* @comment This file has been automatically generated
* from Plasma 2.5.0 for MORSE 1.0.0
* @author Hatem Ltaief
* @author Jakub Kurzak
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @date 2010-11-15
* @precisions normal z -> c
*
**/

PRUVOST Florent
committed
#include "runtime/starpu/include/morse_starpu.h"

PRUVOST Florent
committed
#include "runtime/starpu/include/runtime_codelet_z.h"
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
/**
*
* @ingroup CORE_MORSE_Complex64_t
*
**/
void MORSE_TASK_zhemm(MORSE_option_t *options,
MORSE_enum side, MORSE_enum uplo,
int m, int n, int nb,
MORSE_Complex64_t alpha, MORSE_desc_t *A, int Am, int An, int lda,
MORSE_desc_t *B, int Bm, int Bn, int ldb,
MORSE_Complex64_t beta, MORSE_desc_t *C, int Cm, int Cn, int ldc)
{
(void)nb;
struct starpu_codelet *codelet = &cl_zhemm;
void (*callback)(void*) = options->profiling ? cl_zhemm_callback : NULL;
if ( morse_desc_islocal( A, Am, An ) ||
morse_desc_islocal( B, Bm, Bn ) ||
morse_desc_islocal( C, Cm, Cn ) )
{
starpu_insert_task(
codelet,
STARPU_VALUE, &side, sizeof(MORSE_enum),
STARPU_VALUE, &uplo, sizeof(MORSE_enum),
STARPU_VALUE, &m, sizeof(int),
STARPU_VALUE, &n, sizeof(int),
STARPU_VALUE, &alpha, sizeof(MORSE_Complex64_t),
STARPU_R, RTBLKADDR(A, MORSE_Complex64_t, Am, An),
STARPU_VALUE, &lda, sizeof(int),
STARPU_R, RTBLKADDR(B, MORSE_Complex64_t, Bm, Bn),
STARPU_VALUE, &ldb, sizeof(int),
STARPU_VALUE, &beta, sizeof(MORSE_Complex64_t),
STARPU_RW, RTBLKADDR(C, MORSE_Complex64_t, Cm, Cn),
STARPU_VALUE, &ldc, sizeof(int),
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
0);
}
}
static void cl_zhemm_cpu_func(void *descr[], void *cl_arg)
{
MORSE_enum side;
MORSE_enum uplo;
int M;
int N;
MORSE_Complex64_t alpha;
MORSE_Complex64_t *A;
int LDA;
MORSE_Complex64_t *B;
int LDB;
MORSE_Complex64_t beta;
MORSE_Complex64_t *C;
int LDC;
A = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
B = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
C = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
starpu_codelet_unpack_args(cl_arg, &side, &uplo, &M, &N, &alpha, &LDA, &LDB, &beta, &LDC);
cblas_zhemm(
CblasColMajor,
(CBLAS_SIDE)side, (CBLAS_UPLO)uplo,
M, N,
CBLAS_SADDR(alpha), A, LDA,
B, LDB,
CBLAS_SADDR(beta), C, LDC);
}
#ifdef CHAMELEON_USE_CUDA
#if defined(CHAMELEON_USE_CUBLAS_V2)
static void cl_zhemm_cuda_func(void *descr[], void *cl_arg)
{
MORSE_enum side;
MORSE_enum uplo;
int M;
int N;
cuDoubleComplex alpha;
const cuDoubleComplex *A;
int LDA;
const cuDoubleComplex *B;
int LDB;
cuDoubleComplex beta;
cuDoubleComplex *C;
int LDC;
CUstream stream;
cublasHandle_t handle;
cublasStatus_t stat;
cublasSideMode_t cublasSide;
cublasFillMode_t cublasUplo;
A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
B = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
starpu_codelet_unpack_args(cl_arg, &side, &uplo, &M, &N, &alpha, &LDA, &LDB, &beta, &LDC);
if (stat != CUBLAS_STATUS_SUCCESS) {
printf ("CUBLAS initialization failed\n");
assert( stat == CUBLAS_STATUS_SUCCESS );
}
stream = starpu_cuda_get_local_stream();
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
stat = cublasSetStream(handle, stream);
if (stat != CUBLAS_STATUS_SUCCESS) {
printf ("cublasSetStream failed\n");
assert( stat == CUBLAS_STATUS_SUCCESS );
}
if (side == MorseLeft){
cublasSide = CUBLAS_SIDE_LEFT;
}else if (side == MorseRight){
cublasSide = CUBLAS_SIDE_RIGHT;
}else{
fprintf(stderr, "Error in cl_zhemm_cuda_func: bad side parameter %d\n", side);
}
if (uplo == MorseUpper){
cublasUplo = CUBLAS_FILL_MODE_UPPER;
}else if(uplo == MorseLower){
cublasUplo = CUBLAS_FILL_MODE_LOWER;
}else if(uplo == MorseUpperLower){
cublasUplo = 0;
}else{
fprintf(stderr, "Error in cl_zhemm_cuda_func: bad uplo parameter %d\n", uplo);
}
stat = cublasZhemm(handle,
cublasSide, cublasUplo,
M, N,
(const cuDoubleComplex *) &alpha, A, LDA,
B, LDB,
(const cuDoubleComplex *) &beta, C, LDC);
if (stat != CUBLAS_STATUS_SUCCESS){
printf ("cublasZhemm failed");
cublasDestroy(handle);
assert( stat == CUBLAS_STATUS_SUCCESS );
}
cublasDestroy(handle);
#ifndef STARPU_CUDA_ASYNC
cudaStreamSynchronize( stream );
#endif
return;
}
#else /* CHAMELEON_USE_CUBLAS_V2 */
static void cl_zhemm_cuda_func(void *descr[], void *cl_arg)
{
MORSE_enum side;
MORSE_enum uplo;
int M;
int N;
cuDoubleComplex alpha;
cuDoubleComplex *A;
int LDA;
cuDoubleComplex *B;
int LDB;
cuDoubleComplex beta;
cuDoubleComplex *C;
int LDC;
CUstream stream;
A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
starpu_codelet_unpack_args(cl_arg, &side, &uplo, &M, &N, &alpha, &LDA, &LDB, &beta, &LDC);
stream = starpu_cuda_get_local_stream();
cublasSetKernelStream( stream );
cublasZhemm(
morse_lapack_const(side), morse_lapack_const(uplo),
M, N,
alpha, A, LDA,
B, LDB,
beta, C, LDC);
#ifndef STARPU_CUDA_ASYNC
cudaStreamSynchronize( stream );
#endif
return;
}
#endif /* CHAMELEON_USE_CUBLAS_V2 */
#endif /* CHAMELEON_USE_CUDA */
/*
* Codelet definition
*/
CODELETS(zhemm, 3, cl_zhemm_cpu_func, cl_zhemm_cuda_func, STARPU_CUDA_ASYNC)