Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
/**
*
* @copyright (c) 2009-2014 The University of Tennessee and The University
* of Tennessee Research Foundation.
* All rights reserved.
* @copyright (c) 2012-2014 Inria. All rights reserved.
* @copyright (c) 2012-2014 IPB. All rights reserved.
*
**/
/**
*
* @file codelet_zhemm.c
*
* MORSE codelets kernel
* MORSE is a software package provided by Univ. of Tennessee,
* Univ. of California Berkeley and Univ. of Colorado Denver
*
* @version 2.5.0
* @comment This file has been automatically generated
* from Plasma 2.5.0 for MORSE 1.0.0
* @author Hatem Ltaief
* @author Jakub Kurzak
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @date 2010-11-15
* @precisions normal z -> c
*
**/
#include "morse_starpu.h"
#include "codelet_z.h"
/**
*
* @ingroup CORE_MORSE_Complex64_t
*
**/
void MORSE_TASK_zhemm(MORSE_option_t *options,
MORSE_enum side, MORSE_enum uplo,
int m, int n, int nb,
MORSE_Complex64_t alpha, MORSE_desc_t *A, int Am, int An, int lda,
MORSE_desc_t *B, int Bm, int Bn, int ldb,
MORSE_Complex64_t beta, MORSE_desc_t *C, int Cm, int Cn, int ldc)
{
(void)nb;
struct starpu_codelet *codelet = &cl_zhemm;
void (*callback)(void*) = options->profiling ? cl_zhemm_callback : NULL;
if ( morse_desc_islocal( A, Am, An ) ||
morse_desc_islocal( B, Bm, Bn ) ||
morse_desc_islocal( C, Cm, Cn ) )
{
starpu_insert_task(
codelet,
STARPU_VALUE, &side, sizeof(MORSE_enum),
STARPU_VALUE, &uplo, sizeof(MORSE_enum),
STARPU_VALUE, &m, sizeof(int),
STARPU_VALUE, &n, sizeof(int),
STARPU_VALUE, &alpha, sizeof(MORSE_Complex64_t),
STARPU_R, RTBLKADDR(A, MORSE_Complex64_t, Am, An),
STARPU_VALUE, &lda, sizeof(int),
STARPU_R, RTBLKADDR(B, MORSE_Complex64_t, Bm, Bn),
STARPU_VALUE, &ldb, sizeof(int),
STARPU_VALUE, &beta, sizeof(MORSE_Complex64_t),
STARPU_RW, RTBLKADDR(C, MORSE_Complex64_t, Cm, Cn),
STARPU_VALUE, &ldc, sizeof(int),
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
0);
}
}
static void cl_zhemm_cpu_func(void *descr[], void *cl_arg)
{
MORSE_enum side;
MORSE_enum uplo;
int M;
int N;
MORSE_Complex64_t alpha;
MORSE_Complex64_t *A;
int LDA;
MORSE_Complex64_t *B;
int LDB;
MORSE_Complex64_t beta;
MORSE_Complex64_t *C;
int LDC;
A = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
B = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
C = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
starpu_codelet_unpack_args(cl_arg, &side, &uplo, &M, &N, &alpha, &LDA, &LDB, &beta, &LDC);
cblas_zhemm(
CblasColMajor,
(CBLAS_SIDE)side, (CBLAS_UPLO)uplo,
M, N,
CBLAS_SADDR(alpha), A, LDA,
B, LDB,
CBLAS_SADDR(beta), C, LDC);
}
#ifdef MAGMAMORSE_USE_CUDA
static void cl_zhemm_cuda_func(void *descr[], void *cl_arg)
{
MORSE_enum side;
MORSE_enum uplo;
int M;
int N;
cuDoubleComplex alpha;
cuDoubleComplex *A;
int LDA;
cuDoubleComplex *B;
int LDB;
cuDoubleComplex beta;
cuDoubleComplex *C;
int LDC;
A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
starpu_codelet_unpack_args(cl_arg, &side, &uplo, &M, &N, &alpha, &LDA, &LDB, &beta, &LDC);
CUstream stream = starpu_cuda_get_local_stream();
cublasSetKernelStream( stream );
cublasZhemm(
lapack_const(side), lapack_const(uplo),
M, N,
alpha, A, LDA,
B, LDB,
beta, C, LDC);
#ifndef STARPU_CUDA_ASYNC
cudaStreamSynchronize( stream );
#endif
return;
}
#endif
/*
* Codelet definition
*/
CODELETS(zhemm, 3, cl_zhemm_cpu_func, cl_zhemm_cuda_func, STARPU_CUDA_ASYNC)