-
PRUVOST Florent authored
change copyright - correct whitespace - place cmake module depending on chameleon in cmake_modules and no more in cmake_modules/morse
PRUVOST Florent authoredchange copyright - correct whitespace - place cmake module depending on chameleon in cmake_modules and no more in cmake_modules/morse
codelet_zttmlq.c 6.95 KiB
/**
*
* @copyright (c) 2009-2014 The University of Tennessee and The University
* of Tennessee Research Foundation.
* All rights reserved.
* @copyright (c) 2012-2014 Inria. All rights reserved.
* @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
*
**/
/**
*
* @file codelet_zttmlq.c
*
* MORSE codelets kernel
* MORSE is a software package provided by Univ. of Tennessee,
* Univ. of California Berkeley and Univ. of Colorado Denver
*
* @version 2.5.0
* @comment This file has been automatically generated
* from Plasma 2.5.0 for MORSE 1.0.0
* @author Hatem Ltaief
* @author Dulceneia Becker
* @author Mathieu Faverge
* @author Emmanuel Agullo
* @author Cedric Castagnede
* @date 2010-11-15
* @precisions normal z -> c d s
*
**/
#include "morse_starpu.h"
#include "codelet_z.h"
/**
*
* @ingroup CORE_MORSE_Complex64_t
*
* CORE_zttmlq overwrites the general complex M1-by-N1 tile A1 and
* M2-by-N2 tile A2 (N1 == N2) with
*
* SIDE = 'L' SIDE = 'R'
* TRANS = 'N': Q * | A1 | | A1 | * Q
* | A2 | | A2 |
*
* TRANS = 'C': Q**H * | A1 | | A1 | * Q**H
* | A2 | | A2 |
*
* where Q is a complex unitary matrix defined as the product of k
* elementary reflectors
*
* Q = H(1) H(2) . . . H(k)
*
* as returned by CORE_zttqrt.
*
*******************************************************************************
*
* @param[in] side
* @arg MorseLeft : apply Q or Q**H from the Left;
* @arg MorseRight : apply Q or Q**H from the Right.
*
* @param[in] trans
* @arg MorseNoTrans : No transpose, apply Q;
* @arg MorseConjTrans : ConjTranspose, apply Q**H.
*
* @param[in] M1
* The number of rows of the tile A1. M1 >= 0.
*
* @param[in] N1
* The number of columns of the tile A1. N1 >= 0.
*
* @param[in] M2
* The number of rows of the tile A2. M2 >= 0.
*
* @param[in] N2
* The number of columns of the tile A2. N2 >= 0.
*
* @param[in] K
* The number of elementary reflectors whose product defines
* the matrix Q.
*
* @param[in] IB
* The inner-blocking size. IB >= 0.
*
* @param[in,out] A1
* On entry, the M1-by-N1 tile A1.
* On exit, A1 is overwritten by the application of Q.
*
* @param[in] LDA1
* The leading dimension of the array A1. LDA1 >= max(1,M1).
*
* @param[in,out] A2
* On entry, the M2-by-N2 tile A2.
* On exit, A2 is overwritten by the application of Q.
*
* @param[in] LDA2
* The leading dimension of the tile A2. LDA2 >= max(1,M2).
*
* @param[in] V
* The i-th row must contain the vector which defines the
* elementary reflector H(i), for i = 1,2,...,k, as returned by
* CORE_ZTTQRT in the first k rows of its array argument V.
*
* @param[in] LDV
* The leading dimension of the array V. LDV >= max(1,K).
*
* @param[out] T
* The IB-by-N1 triangular factor T of the block reflector.
* T is upper triangular by block (economic storage);
* The rest of the array is not referenced.
*
* @param[in] LDT
* The leading dimension of the array T. LDT >= IB.
*
* @param[out] WORK
* Workspace array of size LDWORK-by-N1.
*
* @param[in] LDWORK
* The dimension of the array WORK. LDWORK >= max(1,IB).
*
*******************************************************************************
*
* @return
* \retval MORSE_SUCCESS successful exit
* \retval <0 if -i, the i-th argument had an illegal value
*
******************************************************************************/
void MORSE_TASK_zttmlq(MORSE_option_t *options,
MORSE_enum side, MORSE_enum trans,
int m1, int n1, int m2, int n2, int k, int ib, int nb,
MORSE_desc_t *A1, int A1m, int A1n, int lda1,
MORSE_desc_t *A2, int A2m, int A2n, int lda2,
MORSE_desc_t *V, int Vm, int Vn, int ldv,
MORSE_desc_t *T, int Tm, int Tn, int ldt)
{
(void)nb;
struct starpu_codelet *codelet = &cl_zttmlq;
void (*callback)(void*) = options->profiling ? cl_zttmlq_callback : NULL;
int ldwork = side == MorseLeft ? ib : nb;
if ( morse_desc_islocal( A1, A1m, A1n ) ||
morse_desc_islocal( A2, A2m, A2n ) ||
morse_desc_islocal( V, Vm, Vn ) ||
morse_desc_islocal( T, Tm, Tn ) )
{
starpu_insert_task(
codelet,
STARPU_VALUE, &side, sizeof(MORSE_enum),
STARPU_VALUE, &trans, sizeof(MORSE_enum),
STARPU_VALUE, &m1, sizeof(int),
STARPU_VALUE, &n1, sizeof(int),
STARPU_VALUE, &m2, sizeof(int),
STARPU_VALUE, &n2, sizeof(int),
STARPU_VALUE, &k, sizeof(int),
STARPU_VALUE, &ib, sizeof(int),
STARPU_RW, RTBLKADDR(A1, MORSE_Complex64_t, A1m, A1n),
STARPU_VALUE, &lda1, sizeof(int),
STARPU_RW, RTBLKADDR(A2, MORSE_Complex64_t, A2m, A2n),
STARPU_VALUE, &lda2, sizeof(int),
STARPU_R, RTBLKADDR(V, MORSE_Complex64_t, Vm, Vn),
STARPU_VALUE, &ldv, sizeof(int),
STARPU_R, RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn),
STARPU_VALUE, &ldt, sizeof(int),
/* nb * ib */
STARPU_SCRATCH, options->ws_worker,
STARPU_VALUE, &ldwork, sizeof(int),
STARPU_PRIORITY, options->priority,
STARPU_CALLBACK, callback,
0);
}
}
static void cl_zttmlq_cpu_func(void *descr[], void *cl_arg)
{
MORSE_enum side;
MORSE_enum trans;
int m1;
int n1;
int m2;
int n2;
int k;
int ib;
MORSE_Complex64_t *A1;
int lda1;
MORSE_Complex64_t *A2;
int lda2;
MORSE_Complex64_t *V;
int ldv;
MORSE_Complex64_t *T;
int ldt;
MORSE_Complex64_t *WORK;
int ldwork;
A1 = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
A2 = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
V = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
T = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]);
WORK = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[4]); /* nb * ib */
starpu_codelet_unpack_args(cl_arg, &side, &trans, &m1, &n1, &m2, &n2, &k, &ib,
&lda1, &lda2, &ldv, &ldt, &ldwork);
CORE_zttmlq(side, trans, m1, n1, m2, n2, k, ib, A1, lda1,
A2, lda2, V, ldv, T, ldt, WORK, ldwork);
}
/*
* Codelet definition
*/
CODELETS_CPU(zttmlq, 5, cl_zttmlq_cpu_func)