-
Mathieu Faverge authoredMathieu Faverge authored
codelet_zcallback.c 9.61 KiB
/**
*
* @file starpu/codelet_zcallback.c
*
* @copyright 2009-2014 The University of Tennessee and The University of
* Tennessee Research Foundation. All rights reserved.
* @copyright 2012-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
* Univ. Bordeaux. All rights reserved.
*
***
*
* @brief Chameleon zcallback StarPU codelet
*
* @version 1.3.0
* @author Mathieu Faverge
* @author Cedric Augonnet
* @author Florent Pruvost
* @author Alycia Lisito
* @date 2023-07-06
* @precisions normal z -> c d s
*
*/
#include "chameleon_starpu.h"
#include "runtime_codelet_z.h"
#if defined(PRECISION_z) || defined(PRECISION_c)
CHAMELEON_CL_CB(dlag2z, cti_handle_get_m(task->handles[1]), cti_handle_get_n(task->handles[1]), 0, M*N)
#endif
CHAMELEON_CL_CB(dzasum, cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0, M*N)
CHAMELEON_CL_CB(zaxpy, cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[1]), 0, M)
CHAMELEON_CL_CB(zgeadd, cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0, M*N)
CHAMELEON_CL_CB(ztradd, cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0, M*N/2.)
CHAMELEON_CL_CB(zlascal, cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0, M*N)
CHAMELEON_CL_CB(zgelqt, cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0, (4./3.)*M*N*K)
CHAMELEON_CL_CB(zgemv, cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0, 2. *M*N )
CHAMELEON_CL_CB(zgemm, cti_handle_get_m(task->handles[2]), cti_handle_get_n(task->handles[2]), cti_handle_get_n(task->handles[0]), 2. *M*N*K) /* If A^t, computation is wrong */
CHAMELEON_CL_CB(zgeqrt, cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0, (4./3.)*M*M*N)
CHAMELEON_CL_CB(zgessm, cti_handle_get_m(task->handles[2]), cti_handle_get_m(task->handles[2]), cti_handle_get_m(task->handles[2]), 2. *M*N*K)
CHAMELEON_CL_CB(zgessq, cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[0]), 0, 4.*M*N)
CHAMELEON_CL_CB(zgetrf, cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[0]), (2./3.)*M*N*K)
CHAMELEON_CL_CB(zgetrf_incpiv, cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[0]), (2./3.)*M*N*K)
CHAMELEON_CL_CB(zgetrf_nopiv, cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[0]), (2./3.)*M*N*K)
CHAMELEON_CL_CB(zgesum, cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[0]), 0, M*N)
CHAMELEON_CL_CB(zcesca, cti_handle_get_m(task->handles[5]), cti_handle_get_n(task->handles[5]), 0, 8.*M*N)
CHAMELEON_CL_CB(zgram, cti_handle_get_m(task->handles[3]), cti_handle_get_n(task->handles[3]), 0, 8.*M*N)
CHAMELEON_CL_CB(zhe2ge, cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0, (1./2.0)*M*N)
CHAMELEON_CL_CB(zherfb, cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0, 2. *M* M*M)
#if defined(PRECISION_z) || defined(PRECISION_c)
CHAMELEON_CL_CB(zhemm, cti_handle_get_m(task->handles[2]), cti_handle_get_n(task->handles[2]), 0, 2.*M*M *N)
CHAMELEON_CL_CB(zher2k, cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0, ( 1.+2.*M*N)*M)
CHAMELEON_CL_CB(zherk, cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0, ( 1.+ M)*M*N)
#endif
CHAMELEON_CL_CB(zlacpy, cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0, M*N)
CHAMELEON_CL_CB(zlacpyx, cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0, M*N)
CHAMELEON_CL_CB(zlange, cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0, M*N)
CHAMELEON_CL_CB(zlaset, cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0, M*N)
CHAMELEON_CL_CB(zlaset2, cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0, M*N)
CHAMELEON_CL_CB(zlatro, cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0, M*N)
CHAMELEON_CL_CB(zlauum, cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[0]), (1./3.)*M* M*M)
#if defined(PRECISION_z) || defined(PRECISION_c)
CHAMELEON_CL_CB(zplghe, cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0, M*N)
CHAMELEON_CL_CB(zsytrf_nopiv, cti_handle_get_m(task->handles[0]), 0, 0, (1./3.)*M* M*M)
#endif
CHAMELEON_CL_CB(zplgsy, cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0, M*N)
CHAMELEON_CL_CB(zplrnt, cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0, M*N)
CHAMELEON_CL_CB(zbuild, cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0, M*N)
CHAMELEON_CL_CB(zplssq, cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0, M*N)
CHAMELEON_CL_CB(zplssq2, cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0, 2*N)
CHAMELEON_CL_CB(zpotrf, cti_handle_get_m(task->handles[0]), 0, 0, (1./3.)*M* M*M)
CHAMELEON_CL_CB(zssssm, cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[0]), M*M*(2.*M+cti_handle_get_m(task->handles[2])))
CHAMELEON_CL_CB(zsymm, cti_handle_get_m(task->handles[2]), cti_handle_get_n(task->handles[2]), 0, 2.*M*M *N)
CHAMELEON_CL_CB(zsyr2k, cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0, ( 1.+2.*M*N)*M)
CHAMELEON_CL_CB(zsyrk, cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0, ( 1.+ M)*M*N)
CHAMELEON_CL_CB(ztplqt, cti_handle_get_m(task->handles[1]), cti_handle_get_n(task->handles[1]), cti_handle_get_m(task->handles[0]), 2.*M*N*K)
CHAMELEON_CL_CB(ztpqrt, cti_handle_get_m(task->handles[1]), cti_handle_get_n(task->handles[1]), cti_handle_get_m(task->handles[0]), 2.*M*N*K)
CHAMELEON_CL_CB(ztpmlqt, cti_handle_get_m(task->handles[3]), cti_handle_get_n(task->handles[3]), cti_handle_get_m(task->handles[2]), 4.*M*N*K)
CHAMELEON_CL_CB(ztpmqrt, cti_handle_get_m(task->handles[3]), cti_handle_get_n(task->handles[3]), cti_handle_get_m(task->handles[2]), 4.*M*N*K)
CHAMELEON_CL_CB(ztrasm, cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0, 0.5*M*(M+1))
CHAMELEON_CL_CB(ztrmm, cti_handle_get_m(task->handles[1]), cti_handle_get_n(task->handles[1]), 0, M*M*N)
CHAMELEON_CL_CB(ztrsm, cti_handle_get_m(task->handles[1]), cti_handle_get_n(task->handles[1]), 0, M*M*N)
CHAMELEON_CL_CB(ztrtri, cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[0]), (1./3.)*M *M*M)
CHAMELEON_CL_CB(ztsmlq_hetra1, cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[0]), (4.0*M+cti_handle_get_m(task->handles[3]))*M*M)
CHAMELEON_CL_CB(ztsmqr_hetra1, cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[0]), (4.0*M+cti_handle_get_m(task->handles[3]))*M*M)
CHAMELEON_CL_CB(ztstrf, cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[0]), M* M*M)
CHAMELEON_CL_CB(zunmlq, cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[0]), 2. *M* M*M)
CHAMELEON_CL_CB(zunmqr, cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[0]), 2. *M* M*M)
#if defined(PRECISION_d) || defined(PRECISION_s)
CHAMELEON_CL_CB(dlag2h, cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0, M*N)
CHAMELEON_CL_CB(hlag2d, cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0, M*N)
#endif