Newer
Older
/**
*
* @copyright (c) 2009-2014 The University of Tennessee and The University
* of Tennessee Research Foundation.
* All rights reserved.
* @copyright (c) 2012-2014 Inria. All rights reserved.
THIBAULT Samuel
committed
* @copyright (c) 2012-2014, 2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
*
**/
/**
*
* @file codelet_zgetrf_nopiv.c
*
* MORSE codelets kernel
* MORSE is a software package provided by Univ. of Tennessee,
* Univ. of California Berkeley and Univ. of Colorado Denver
*
* @version 2.6.0
* @author Omar Zenati
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @date 2013-02-01
* @precisions normal z -> c d s
*
**/

PRUVOST Florent
committed
#include "runtime/starpu/include/morse_starpu.h"

PRUVOST Florent
committed
#include "runtime/starpu/include/runtime_codelet_z.h"
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
/**
*
* @ingroup CORE_MORSE_Complex64_t
*
* CORE_zgetrf_nopiv computes an LU factorization of a general diagonal
* dominant M-by-N matrix A witout pivoting.
*
* The factorization has the form
* A = L * U
* where L is lower triangular with unit
* diagonal elements (lower trapezoidal if m > n), and U is upper
* triangular (upper trapezoidal if m < n).
*
* This is the right-looking Level 3 BLAS version of the algorithm.
* WARNING: Your matrix need to be diagonal dominant if you want to call this
* routine safely.
*
*******************************************************************************
*
* @param[in] M
* The number of rows of the matrix A. M >= 0.
*
* @param[in] N
* The number of columns of the matrix A. N >= 0.
*
* @param[in] IB
* The block size to switch between blocked and unblocked code.
*
* @param[in,out] A
* On entry, the M-by-N matrix to be factored.
* On exit, the factors L and U from the factorization
* A = P*L*U; the unit diagonal elements of L are not stored.
*
* @param[in] LDA
* The leading dimension of the array A. LDA >= max(1,M).
*
*******************************************************************************
*
* @return
* \retval MORSE_SUCCESS successful exit
* \retval <0 if INFO = -k, the k-th argument had an illegal value
* \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
* has been completed, but the factor U is exactly
* singular, and division by zero will occur if it is used
* to solve a system of equations.
*
******************************************************************************/
void MORSE_TASK_zgetrf_nopiv(const MORSE_option_t *options,
int m, int n, int ib, int nb,
const MORSE_desc_t *A, int Am, int An, int lda,
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
int iinfo)
{
(void)nb;
struct starpu_codelet *codelet = &cl_zgetrf_nopiv;
void (*callback)(void*) = options->profiling ? cl_zgetrf_nopiv_callback : NULL;
if ( morse_desc_islocal( A, Am, An ) )
{
starpu_insert_task(
codelet,
STARPU_VALUE, &m, sizeof(int),
STARPU_VALUE, &n, sizeof(int),
STARPU_VALUE, &ib, sizeof(int),
STARPU_RW, RTBLKADDR(A, MORSE_Complex64_t, Am, An),
STARPU_VALUE, &lda, sizeof(int),
STARPU_VALUE, &iinfo, sizeof(int),
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
0);
}
}
/*
* Codelet CPU
*/
static void cl_zgetrf_nopiv_cpu_func(void *descr[], void *cl_arg)
{
int m;
int n;
int ib;
MORSE_Complex64_t *A;
int lda;
int iinfo;
int info = 0;
A = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &iinfo);
CORE_zgetrf_nopiv(m, n, ib, A, lda, &info);
}
/*
* Codelet GPU
*/
#if defined(CHAMELEON_USE_MAGMA)
static void cl_zgetrf_nopiv_cuda_func(void *descr[], void *cl_arg)
{
int m;
int n;
int ib;
cuDoubleComplex *dA;
int lda;
int iinfo;
int info = 0;
starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &iinfo);
dA = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);

PRUVOST Florent
committed
CUDA_zgetrf_nopiv( m, n, dA, lda, &info );
cudaThreadSynchronize();
}
#endif
/*
* Codelet definition
*/
THIBAULT Samuel
committed
#if defined(CHAMELEON_USE_MAGMA) || defined(CHAMELEON_SIMULATION_MAGMA)
CODELETS(zgetrf_nopiv, 1, cl_zgetrf_nopiv_cpu_func, cl_zgetrf_nopiv_cuda_func, 0)
#else
CODELETS_CPU(zgetrf_nopiv, 1, cl_zgetrf_nopiv_cpu_func)
#endif