Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
/**
*
* @copyright (c) 2009-2014 The University of Tennessee and The University
* of Tennessee Research Foundation.
* All rights reserved.
* @copyright (c) 2012-2014 Inria. All rights reserved.
* @copyright (c) 2012-2014 IPB. All rights reserved.
*
**/
/**
*
* @file codelet_zgetrf_nopiv.c
*
* MORSE codelets kernel
* MORSE is a software package provided by Univ. of Tennessee,
* Univ. of California Berkeley and Univ. of Colorado Denver
*
* @version 2.6.0
* @author Omar Zenati
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @date 2013-02-01
* @precisions normal z -> c d s
*
**/
#include "morse_starpu.h"
#include "codelet_z.h"
/**
*
* @ingroup CORE_MORSE_Complex64_t
*
* CORE_zgetrf_nopiv computes an LU factorization of a general diagonal
* dominant M-by-N matrix A witout pivoting.
*
* The factorization has the form
* A = L * U
* where L is lower triangular with unit
* diagonal elements (lower trapezoidal if m > n), and U is upper
* triangular (upper trapezoidal if m < n).
*
* This is the right-looking Level 3 BLAS version of the algorithm.
* WARNING: Your matrix need to be diagonal dominant if you want to call this
* routine safely.
*
*******************************************************************************
*
* @param[in] M
* The number of rows of the matrix A. M >= 0.
*
* @param[in] N
* The number of columns of the matrix A. N >= 0.
*
* @param[in] IB
* The block size to switch between blocked and unblocked code.
*
* @param[in,out] A
* On entry, the M-by-N matrix to be factored.
* On exit, the factors L and U from the factorization
* A = P*L*U; the unit diagonal elements of L are not stored.
*
* @param[in] LDA
* The leading dimension of the array A. LDA >= max(1,M).
*
*******************************************************************************
*
* @return
* \retval MORSE_SUCCESS successful exit
* \retval <0 if INFO = -k, the k-th argument had an illegal value
* \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
* has been completed, but the factor U is exactly
* singular, and division by zero will occur if it is used
* to solve a system of equations.
*
******************************************************************************/
void MORSE_TASK_zgetrf_nopiv(MORSE_option_t *options,
int m, int n, int ib, int nb,
MORSE_desc_t *A, int Am, int An, int lda,
int iinfo)
{
(void)nb;
struct starpu_codelet *codelet = &cl_zgetrf_nopiv;
void (*callback)(void*) = options->profiling ? cl_zgetrf_nopiv_callback : NULL;
if ( morse_desc_islocal( A, Am, An ) )
{
starpu_insert_task(
codelet,
STARPU_VALUE, &m, sizeof(int),
STARPU_VALUE, &n, sizeof(int),
STARPU_VALUE, &ib, sizeof(int),
STARPU_RW, RTBLKADDR(A, MORSE_Complex64_t, Am, An),
STARPU_VALUE, &lda, sizeof(int),
STARPU_VALUE, &iinfo, sizeof(int),
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
0);
}
}
/*
* Codelet CPU
*/
static void cl_zgetrf_nopiv_cpu_func(void *descr[], void *cl_arg)
{
int m;
int n;
int ib;
MORSE_Complex64_t *A;
int lda;
int iinfo;
int info = 0;
A = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &iinfo);
CORE_zgetrf_nopiv(m, n, ib, A, lda, &info);
}
/*
* Codelet GPU
*/
#if defined(MAGMAMORSE_USE_MAGMA)
static void cl_zgetrf_nopiv_cuda_func(void *descr[], void *cl_arg)
{
int m;
int n;
int ib;
cuDoubleComplex *dA;
int lda;
int iinfo;
int info = 0;
starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &iinfo);
dA = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
magma_zgetrf_nopiv_gpu( m, n, dA, lda, &info );
cudaThreadSynchronize();
}
#endif
/*
* Codelet definition
*/
#if defined(MAGMAMORSE_USE_MAGMA)
CODELETS(zgetrf_nopiv, 1, cl_zgetrf_nopiv_cpu_func, cl_zgetrf_nopiv_cuda_func, 0)
#else
CODELETS_CPU(zgetrf_nopiv, 1, cl_zgetrf_nopiv_cpu_func)
#endif