Newer
Older
* @copyright 2009-2016 The University of Tennessee and The University
* of Tennessee Research Foundation.
* All rights reserved.
* @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
* MORSE codelets kernel
* MORSE is a software package provided by Univ. of Tennessee,
* Univ. of California Berkeley and Univ. of Colorado Denver
*
* @version 0.9.0
* @author Mathieu Faverge
* @date 2016-12-15
* @precisions normal z -> s d c
*
**/
#include "chameleon_starpu.h"
#include "runtime_codelet_z.h"
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#if !defined(CHAMELEON_SIMULATION)
static void cl_ztpmqrt_cpu_func(void *descr[], void *cl_arg)
{
MORSE_enum side;
MORSE_enum trans;
int M;
int N;
int K;
int L;
int ib;
const MORSE_Complex64_t *V;
int ldv;
const MORSE_Complex64_t *T;
int ldt;
MORSE_Complex64_t *A;
int lda;
MORSE_Complex64_t *B;
int ldb;
MORSE_Complex64_t *WORK;
V = (const MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
T = (const MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
A = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
B = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]);
WORK = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[4]); /* ib * nb */
starpu_codelet_unpack_args( cl_arg, &side, &trans, &M, &N, &K, &L, &ib,
&ldv, &ldt, &lda, &ldb );
CORE_ztpmqrt( side, trans, M, N, K, L, ib,
V, ldv, T, ldt, A, lda, B, ldb, WORK );
}
#if defined(CHAMELEON_USE_CUDA)
static void cl_ztpmqrt_cuda_func(void *descr[], void *cl_arg)
{
MORSE_enum side;
MORSE_enum trans;
int M;
int N;
int K;
int L;
int ib;
const cuDoubleComplex *V;
int ldv;
const cuDoubleComplex *T;
int ldt;
cuDoubleComplex *A;
int lda;
cuDoubleComplex *B;
int ldb;
cuDoubleComplex *W;
V = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
T = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]);
W = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[4]); /* 2*ib*nb */
starpu_codelet_unpack_args( cl_arg, &side, &trans, &M, &N, &K, &L, &ib,
&ldv, &ldt, &lda, &ldb );
CUDA_ztpmqrt(
side, trans, M, N, K, L, ib,

Mathieu Faverge
committed
V, ldv, T, ldt, A, lda, B, ldb,
W, stream );
#ifndef STARPU_CUDA_ASYNC
cudaStreamSynchronize( stream );
#endif
}
#endif /* defined(CHAMELEON_USE_CUDA) */
#endif /* !defined(CHAMELEON_SIMULATION) */
/*
* Codelet definition
*/
CODELETS(ztpmqrt, 5, cl_ztpmqrt_cpu_func, cl_ztpmqrt_cuda_func, STARPU_CUDA_ASYNC)
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
void
MORSE_TASK_ztpmqrt( const MORSE_option_t *options,
MORSE_enum side, MORSE_enum trans,
int M, int N, int K, int L, int ib, int nb,
const MORSE_desc_t *V, int Vm, int Vn, int ldv,
const MORSE_desc_t *T, int Tm, int Tn, int ldt,
const MORSE_desc_t *A, int Am, int An, int lda,
const MORSE_desc_t *B, int Bm, int Bn, int ldb )
{
struct starpu_codelet *codelet = &cl_ztpmqrt;
void (*callback)(void*) = options->profiling ? cl_ztpmqrt_callback : NULL;
MORSE_BEGIN_ACCESS_DECLARATION;
MORSE_ACCESS_R(V, Vm, Vn);
MORSE_ACCESS_R(T, Tm, Tn);
MORSE_ACCESS_RW(A, Am, An);
MORSE_ACCESS_RW(B, Bm, Bn);
MORSE_END_ACCESS_DECLARATION;
starpu_insert_task(
starpu_mpi_codelet(codelet),
STARPU_VALUE, &side, sizeof(MORSE_enum),
STARPU_VALUE, &trans, sizeof(MORSE_enum),
STARPU_VALUE, &M, sizeof(int),
STARPU_VALUE, &N, sizeof(int),
STARPU_VALUE, &K, sizeof(int),
STARPU_VALUE, &L, sizeof(int),
STARPU_VALUE, &ib, sizeof(int),
STARPU_R, RTBLKADDR(V, MORSE_Complex64_t, Vm, Vn),
STARPU_VALUE, &ldv, sizeof(int),
STARPU_R, RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn),
STARPU_VALUE, &ldt, sizeof(int),
STARPU_RW, RTBLKADDR(A, MORSE_Complex64_t, Am, An),
STARPU_VALUE, &lda, sizeof(int),
STARPU_RW, RTBLKADDR(B, MORSE_Complex64_t, Bm, Bn),
STARPU_VALUE, &ldb, sizeof(int),
/* Other options */
STARPU_SCRATCH, options->ws_worker,
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
#if defined(CHAMELEON_USE_MPI)
STARPU_EXECUTE_ON_NODE, B->get_rankof(B, Bm, Bn),
#endif
#if defined(CHAMELEON_CODELETS_HAVE_NAME)
STARPU_NAME, (( L == 0 ) ? "ztsmqr" : "ztpmqrt"),
#endif
0);
(void)ib; (void)nb;
}